cd "C:\Users\jedwab\Desktop\Replication Files for The Employment Profile of Cities around the World WD\Stata"

***********************************************************************************

* this program generates a coefficient and standard error (with stars) 
* to report on a linear combination:
program drop _all
program proglincom, rclass
    lincom `0'
    local coefftxttmp = string(r(estimate),"%9.2f")
    local se = r(se)
    return local setxt = "["+string(`se',"%9.2f")+"]"
    local tstat=r(estimate)/r(se)
    gen pval = tprob(r(df), abs(`tstat'))*100
    egen pvalcat = cut(pval), at(0,1,5,10,100)
    lab def starlab 0 "***" 1 "**" 5 "*" 10 "", replace
    lab val pvalcat starlab
    decode pvalcat, gen(stars)
	local startxt = stars
	return local coefftxt = "`coefftxttmp'" + "`startxt'"
	drop pval pvalcat stars
end 
* this program generates a coefficient and standard error (with stars) 
* to report on a non-linear combination:
program prognlcom, rclass
    nlcom `0'
    matrix A = r(b)
	local coefftxttmp = string(A[1,1],"%9.2f")
	matrix B = r(V)
    local se = sqrt(B[1,1])
    return local setxt = "["+string(`se',"%9.2f")+"]"
    local tstat=A[1,1]/`se'
    gen pval = tprob(r(df_r), abs(`tstat'))*100
    egen pvalcat = cut(pval), at(0,1,5,10,100)
    lab def starlab 0 "***" 1 "**" 5 "*" 10 "", replace
    lab val pvalcat starlab
    decode pvalcat, gen(stars)
	local startxt = stars
	return local coefftxt = "`coefftxttmp'" + "`startxt'"
	display "`coefftxttmp'"
	drop pval pvalcat stars
	matrix drop A B
end

*********************************************************************************************************************************************************
*********************************************************************************************************************************************************

* We create the main files:
* LACdata

*********************
*********************
**# DATA CREATION #1
*********************
*********************

****************************************
** BASELINE DATA AT THE COUNTRY LEVEL **
****************************************

* This data set is created in the subfolder "Ipums and Other Files"
use main_dataset_meat, clear
tab year
* We only keep some variables for the final analysis
drop region_* continent_* rrents NRX* mfgpanel* mfg_gdp* serv*
* We modify some variables 
ren country country_wb
gen ccodeyr = ccode+" "+string(year)
* Dummy if Latin America and the Carribean 
gen LAC = (continent == "LAC")
* Region names 
gen regionshort = ""
replace regionshort = "EAP" if region == "East Asia & Pacific"
replace regionshort = "LAC" if region == "Latin America & Caribbean"
replace regionshort = "MENA" if region == "Middle East & North Africa"
replace regionshort = "SA" if region == "South Asia"
replace regionshort = "SSA" if region == "Sub-Saharan Africa"
replace continent = "Asia-Pacific" if continent == "Asia"
replace continent = "Asia-Pacific" if continent == "Pacific"
gen agregion = regionshort
replace agregion = "ASIA" if regionshort == "SA" | regionshort == "EAP"
* Log of country pop
gen lpop = log(pop)
gen lpop_sq = lpop*lpop
order ccode year ccodeyr country_wb country_wup agregion region regionshort continent LAC pop lpop* urbrate primacy_rate 
* We only keep some versions of the variables
* We keep the moving averages using two leads and lags 
** Share of merchandise exports in GDP **
corr merch_x_gdp* 
drop merch_x_gdp merch_x_gdp_ma1
ren merch_x_gdp_ma2 merch_x_gdp
** Share of manufacturing exports in total merchandise exports **
corr mfg_sh_x* 
drop mfg_sh_x mfg_sh_x_ma1
ren mfg_sh_x_ma2 mfg_sh_x
** Share of agricultural exports in total merchandise exports **
corr agri_sh_x*
drop agri_sh_x agri_sh_x_ma1
ren agri_sh_x_ma2 agri_sh_x
** Share of mining/fuel in total merchandise exports **
corr min_sh_x*
drop min_sh_x min_sh_x_ma1
ren min_sh_x_ma2 minfuel_sh_x
label var minfuel_sh_x "Share of fuel & mining x in merch x (%) in t (MA2)"
sort ccode year 
** Primacy rate **
codebook primacy_rate if year >= 1960 
sum primacy_rate, d
** Cash crops - DROP **
drop cash_sh*
* We order the variables and label them
label var ccodeyr "Country code-year"
label var agregion "World region (aggregate)"
label var regionshort  "World region (short name)"
label var LAC "Dummy if LAC region"
label var lpop "Log of Population (000s) in t"
label var lpop_sq "Square of log population"
order country* ccode* year region* continent LAC *pop*
save LACdata, replace

**************************************
*** CORRECTING SOME MISSING VALUES ***
**************************************

* Some of the core variables still have missing values
* We correct these using simple Google searches

*** Missings in the share of manufacturing exports in total exports ***

* For the share of manufacturing exports in total exports 
use LACdata, clear
** Bahamas **
replace mfg_sh_x = 3.5 if ccode == "BHS" & year <= 1970
** Eritrea **
replace mfg_sh_x = 28 if ccode == "ERI" & mfg_sh_x == .
save LACdata, replace

* We create the manufacturing exports share of GDP
use LACdata, clear
gen mfg_sh_gdp =  merch_x_gdp/100*mfg_sh_x 
save LACdata, replace

*** Missings in the share of mining/fuel exports in exports ***

* Issue for the Bahamas in 1960-1970
* The high share is due to reexports from oil refining
* We set fuel exports to 0 and only use "ores and metals exports" from the source below
use LACdata, clear
codebook minfuel* if year >= 1960
tab ccodeyr if year >= 1960 & minfuel_sh_x == .

* We obtain Ores and metals exports (% of merchandise exports) from the source below to correct the Bahamas observations
* Source: World Development Indicators (WDI) database of the World Bank
* Last accessed: 02-02-2021
* Variable: Ores and metals exports (% of merchandise exports)
clear
import excel "Data_Extract_From_WDI_Database_Archives_(beta) (7).xlsx", sheet("Data") firstrow
collapse (mean) y*, by(ccode)
drop if ccode == ""
reshape long y, i(ccode) j(year)
ren y sh
sort ccode year
foreach X in 5 4 3 2 1 {
bysort ccode: gen lag`X'sh = sh[_n-`X']
}
foreach X in 1 2 3 4 5 {
bysort ccode: gen lead`X'sh = sh[_n+`X']
}
order ccode year lag* sh* lead*
gen min_ma0 = sh
foreach X in 1 2 3 4 5 {
egen min_ma`X' = rmean(sh lag`X'sh-lead`X'sh)
}
corr min_ma*
keep ccode year min_ma5
gen lastyear = substr(string(year),4,1)
destring lastyear, replace
keep if lastyear == 5 | lastyear == 0
keep ccode year min_ma5
ren min_ma5 min_bhs
sort ccode year
save min_bhs, replace
use LACdata, clear
sort ccode year
merge ccode year using min_bhs
tab _m
drop _m
sum minfuel_sh_x if ccode == "BHS"
sum min_bhs if ccode == "BHS"
replace minfuel_sh_x = min_bhs if ccode == "BHS"
drop min_bhs 
sort ccode year
save LACdata, replace

*** Missings in the share of merchandise exports in GDP ***

use LACdata, clear
codebook merch_x_gdp if year >= 1960
tab ccodeyr if year >= 1960 & merch_x_gdp == .
* Issues for Eritrea and Somalia * 

* We replace these with the values from the WDI database

* For Somalia
* Source: World Development Indicators (WDI) database of the World Bank
* Last accessed: 02-02-2021
clear
import excel "Data_Extract_From_WDI_Database_Archives_(beta) (9).xlsx", sheet("Data") firstrow clear
collapse (mean) y*, by(ccode)
drop if ccode == ""
reshape long y, i(ccode) j(year)
ren y sh
sort ccode year
foreach X in 5 4 3 2 1 {
bysort ccode: gen lag`X'sh = sh[_n-`X']
}
foreach X in 1 2 3 4 5 {
bysort ccode: gen lead`X'sh = sh[_n+`X']
}
order ccode year lag* sh* lead*
gen som_ma0 = sh
foreach X in 1 2 3 4 5 {
egen som_ma`X' = rmean(sh lag`X'sh-lead`X'sh)
}
corr som_ma*
keep ccode year som_ma5
gen lastyear = substr(string(year),4,1)
destring lastyear, replace
keep if lastyear == 5 | lastyear == 0
keep ccode year som_ma5
ren som_ma5 merch_x_gdp_som
sort ccode year
save merch_x_gdp_som, replace
use LACdata, clear
sort ccode year
merge ccode year using merch_x_gdp_som
tab _m
drop _m
sum merch_x_gdp_som if ccode == "SOM" & year >= 1995
sum merch_x_gdp if ccode == "SOM" & year >= 1995
replace merch_x_gdp = merch_x_gdp_som if ccode == "SOM" & merch_x_gdp == .
drop merch_x_gdp_som
codebook merch_x_gdp if year >= 1960
tab ccodeyr if year >= 1960 & merch_x_gdp == .
save LACdata, replace

* For Eritrea
* Source: World Development Indicators (WDI) database of the World Bank
* Last accessed: 02-02-2021
clear
import excel "Data_Extract_From_WDI_Database_Archives_(beta) (10).xlsx", sheet("Data") firstrow
collapse (mean) y*, by(ccode)
drop if ccode == ""
reshape long y, i(ccode) j(year)
ren y sh
sort ccode year
foreach X in 5 4 3 2 1 {
bysort ccode: gen lag`X'sh = sh[_n-`X']
}
foreach X in 1 2 3 4 5 {
bysort ccode: gen lead`X'sh = sh[_n+`X']
}
order ccode year lag* sh* lead*
gen som_ma0 = sh
foreach X in 1 2 3 4 5 {
egen eri_ma`X' = rmean(sh lag`X'sh-lead`X'sh)
}
corr eri_ma*
keep ccode year eri_ma5
gen lastyear = substr(string(year),4,1)
destring lastyear, replace
keep if lastyear == 5 | lastyear == 0
keep ccode year eri_ma5
ren eri_ma5 merch_x_gdp_eri
sort ccode year
save merch_x_gdp_eri, replace
use LACdata, clear
sort ccode year
merge ccode year using merch_x_gdp_eri
tab _m
drop _m
sum merch_x_gdp_eri year if ccode == "ERI"
replace merch_x_gdp = merch_x_gdp_eri if ccode == "ERI"
replace merch_x_gdp = 23.44808 if ccode == "ERI" & merch_x_gdp == .
drop merch_x_gdp_eri
codebook merch_x_gdp if year >= 2016
label var mfg_sh_gdp "Share of manufacturing exports in GDP (%) in t (MA2)"
save LACdata, replace

********************************
*** OTHER VARIABLES FROM GJV ***
********************************

* "gjv" is the main data set from the replication files of:
* Source: Douglas Gollin & Remi Jedwab & Dietrich Vollrath, 2016. "Urbanization with and without industrialization," Journal of Economic Growth, Springer, vol. 21(1), pages 35-70, March.
* File available in the replication files on the website of the journal. 
use gjv, clear
keep if year == 2010 
gen larea = log(area)
gen larea_sq = larea*larea
label var larea "Log of land area"
label var larea_sq "Log of land area - squared"
replace ccode = "COD" if ccode == "ZAR"
keep ccode area larea* smallisland landlocked admin-type country
gen lthreshold_level = log(threshold_level+1)
label var lthreshold_level "Log of the population threshold"
ren country country_gjv
sort ccode 
save othervars, replace
* We combine with the main data sets
use LACdata, clear 
sort ccode
merge ccode using othervars
tab _m
drop _m
save LACdata, replace
* This is the list of the 116 developing economies used for the main econometric analysis
* We need this list below and for the mapping analysis 
use LACdata, clear 
keep country_wb 
bysort country_wb: keep if _n == 1
sort country_wb
save list116G, replace

***************************************************
*** COMBINING CASH CROP AND MINING-FUEL EXPORTS ***
***************************************************

* We now combine the cash crop exports and mining-fuel exports 

***** NRX in X *****

use LACdata, clear
** NRX in x: mining/fuels + ag exports **
* "ag" stands for all agricultural exports
sum agri_sh_x, d 
gen nrxag_sh_x = minfuel_sh_x + agri_sh_x
sum nrxag_sh_x, d 
tab ccodeyr if nrxag_sh_x >= 105 & nrxag_sh_x != .
* we replace by 100 if above 100
replace nrxag_sh_x = 100 if nrxag_sh_x != . & nrxag_sh_x >= 100
corr nrx*_sh_x
label var nrxag_sh_x "Share min/fuels + crops (incl all ag x) in x"
save LACdata, replace

***** Share of NRX in GDP ***** 

use LACdata, clear
tab ccodeyr if nrxag_sh_x == . & year >= 1960
codebook nrx*sh_x if year >= 1960
sum nrx*sh_x 
codebook merch_x_gdp if year >= 1960
sum merch_x_gdp
foreach X in ag {
gen nrx`X'_sh_gdp = merch_x_gdp/100*nrx`X'_sh_x
label var nrx`X'_sh_gdp "Share of `X' exports in GDP (%) in t"
sum nrx`X'_sh_gdp
}
save LACdata, replace

***** Individual mining-fuels and crops in GDP *****

use LACdata, clear
sum agri_sh_x, d
* The variables are:
* - minfuel_sh_x
* - agri_sh_x
* Shares in exports 
codebook minfuel_sh_x agri_sh_x if year >= 1960 
* We now create the shares in GDP * 
foreach X in minfuel agri {
gen `X'_sh_gdp = merch_x_gdp/100*`X'_sh_x
label var `X'_sh_gdp "Share of `X' exports in GDP (%) in t"
sum `X'_sh_gdp
}
* We correct one small issue for Eritrea 1990
sum nrxag_sh_gdp if country_wb == "Eritrea" & year == 1990
replace nrxag_sh_gdp = minfuel_sh_gdp+agri_sh_gdp if country_wb == "Eritrea" & year == 1990
sum nrxag_sh_gdp if country_wb == "Eritrea" & year == 1990
save LACdata, replace

***** Mean share of NRX in GDP (1960-2020) *****

use LACdata, clear
foreach X in 2000 2010 2015 2020 {
foreach C in ag {
gen nrx`C'_sh_gdp_`X' = nrx`C'_sh_gdp if year <= `X'
bysort ccode: egen nrx`C'_mean`X' = mean(nrx`C'_sh_gdp_`X')
drop nrx`C'_sh_gdp_`X'
label var nrx`C'_mean`X' "Mean share of NRX in GDP from 1960 to `X'"
}
}
save LACdata, replace

***** Mean share of the other subcomponents in GDP (1960-2020) *****

use LACdata, clear
foreach X in 2000 2010 2020 {
foreach C in minfuel agri {
gen `C'_sh_gdp_`X' = `C'_sh_gdp if year <= `X'
bysort ccode: egen `C'_mean`X' = mean(`C'_sh_gdp_`X')
drop `C'_sh_gdp_`X'
sum `C'_mean`X'
label var `C'_mean`X' "Mean share of `C' in GDP from 1960 to `X'"
}
}
save LACdata, replace

***** Mean share of NRX in GDP (1965-2020 and 1970-2020) *****

* Post 1965 *
use LACdata, clear
foreach X in 2010 2015 2020 {
foreach C in ag {
gen nrx`C'_sh_gdp_`X' = nrx`C'_sh_gdp if year >= 1965 & year <= `X'
bysort ccode: egen nrx`C'_mean1965`X' = mean(nrx`C'_sh_gdp_`X')
drop nrx`C'_sh_gdp_`X'
label var nrx`C'_mean1965`X' "Mean share of NRX in GDP from 1965 to `X'"
}
}
save LACdata, replace

* Post 1970 *
use LACdata, clear
foreach X in 2010 2015 2020 {
foreach C in ag {
gen nrx`C'_sh_gdp_`X' = nrx`C'_sh_gdp if year >= 1970 & year <= `X'
bysort ccode: egen nrx`C'_mean1970`X' = mean(nrx`C'_sh_gdp_`X')
drop nrx`C'_sh_gdp_`X'
label var nrx`C'_mean1970`X' "Mean share of NRX in GDP from 1970 to `X'"
}
}
save LACdata, replace

***** NRX vs non-NRX countries ******

* Various definitions of resource-rich countries
use LACdata, clear
codebook nrx*_mean*
foreach X in 2020 {
foreach C in ag {
gen nrx`C'_yn_`X' = (nrx`C'_mean`X' >= 10)
label var nrx`C'_yn_`X' "Resource-rich if mean share of NRX in GDP is above 10"
}
}
foreach X in 2020 {
foreach C in ag {
gen nrx`C'_5yn_`X' = (nrx`C'_mean`X' >= 5)
label var nrx`C'_5yn_`X' "Resource-rich if mean share of NRX in GDP is above 5"
}
}
foreach X in 2020 {
foreach C in ag {
gen nrx`C'_7yn_`X' = (nrx`C'_mean`X' >= 7)
label var nrx`C'_7yn_`X' "Resource-rich if mean share of NRX in GDP is above 7"
}
}
foreach X in 2020 {
foreach C in ag {
gen nrx`C'_75yn_`X' = (nrx`C'_mean`X' >= 7.5)
label var nrx`C'_75yn_`X' "Resource-rich if mean share of NRX in GDP is above 7.5"
}
}
foreach X in 2020 {
foreach C in ag {
gen nrx`C'_8yn_`X' = (nrx`C'_mean`X' >= 8)
label var nrx`C'_8yn_`X' "Resource-rich if mean share of NRX in GDP is above 8"
}
}
save LACdata, replace

*********************************************
*** SHARE MANUFACTURING & SERVICES IN GDP ***
*********************************************

*** Manufacturing ***

* Source: WDI database of the World Bank (beta version)
* Last accessed: 01-31-2021
* This file is very large as it is the beta version. We create a simplified temp_mfg 
*clear
*import excel "Data_Extract_From_WDI_Database_Archives_(beta) (4).xlsx", sheet("Data") firstrow clear
*tab series
* Manufacturing, value added (% of GDP)
*sum y*
* We remove the 100 and values above 100.
*foreach X of numlist 1960(1)2019 {
*replace y`X' = . if y`X' >= 99.9
*} 
* we take the mean of all available measures
*collapse (mean) y*, by(country ccode)
*save temp_mfg, replace

use temp_mfg, clear
drop if ccode == ""
reshape long y, i(ccode) j(year)
ren y sh
drop country
sort ccode year
foreach X in 5 4 3 2 1 {
bysort ccode: gen lag`X'sh = sh[_n-`X']
}
foreach X in 1 2 3 4 5 {
bysort ccode: gen lead`X'sh = sh[_n+`X']
}
order ccode year lag* sh* lead*
gen mfgsh_ma0 = sh
foreach X in 1 2 3 4 5 {
egen mfgsh_ma`X' = rmean(sh lag`X'sh-lead`X'sh)
}
corr mfgsh_ma*
replace year = 2020 if year == 2019
gen lastyear = substr(string(year),4,1)
destring lastyear, replace
keep if lastyear == 5 | lastyear == 0
keep ccode year mfgsh_ma*
sort ccode year
save mfg_all, replace

*** Services (etc.) ***

* Source: WDI database of the World Bank (beta version)
* Last accessed: 01-31-2021
* This file is very large as it is the beta version. We create a simplified temp_serv 
* What we used before was "Services, etc., value added (% of GDP)"
* From the WDI's website: "Services correspond to ISIC divisions 50-99 and they include value added in wholesale and retail trade (including hotels and restaurants), transport, and government, financial, professional, and personal services such as education, health care, and real estate services. Also included are imputed bank service charges, import duties, and any statistical discrepancies noted by national compilers as well as discrepancies arising from rescaling. Value added is the net output of a sector after adding up all outputs and subtracting intermediate inputs. It is calculated without making deductions for depreciation of fabricated assets or depletion and degradation of natural resources. The industrial origin of value added is determined by the International Standard Industrial Classification (ISIC), revision 3. Note: For VAB countries, gross value added at factor cost is used as the denominator."
* we use the same variable. When the share is still unavailable, we use the other variable "Services, value added (% of GDP)(NV.SRV.TOTL.ZS)" - On the WDI's website, note that no details provided on what is exactly included here. 
*clear
*import excel "Data_Extract_From_WDI_Database_Archives_(beta) (5).xlsx", sheet("Data") firstrow
*tab series
* Services, etc., value added (% of GDP)
*sum y*
* We remove the 100 and values above 100.
*foreach X of numlist 1960(1)2019 {
*replace y`X' = . if y`X' >= 99.9
*}
* we take the mean of all available measures
*sum y*
*collapse (mean) y*, by(country ccode)
*save temp_serv, replace

use temp_serv, clear
drop if ccode == ""
reshape long y, i(ccode) j(year)
ren y sh
sum sh, d
drop country
sort ccode year
foreach X in 5 4 3 2 1 {
bysort ccode: gen lag`X'sh = sh[_n-`X']
}
foreach X in 1 2 3 4 5 {
bysort ccode: gen lead`X'sh = sh[_n+`X']
}
order ccode year lag* sh* lead*
gen servsh_ma0 = sh
foreach X in 1 2 3 4 5 {
egen servsh_ma`X' = rmean(sh lag`X'sh-lead`X'sh)
}
corr servsh_ma*
replace year = 2020 if year == 2019
gen lastyear = substr(string(year),4,1)
destring lastyear, replace
keep if lastyear == 5 | lastyear == 0
keep ccode year servsh_ma*
sort ccode year
save serv_all, replace

*** Services (no details provided) ***

* Source: WDI database of the World Bank (beta version)
* Last accessed: 01-31-2021
* This file is very large as it is the beta version. We create a simplified temp_serv2 
* When the previous variable is unavailable, we use "Services, value added (% of GDP)(NV.SRV.TOTL.ZS)".
*clear
*import excel "Data_Extract_From_WDI_Database_Archives_(beta) (6).xlsx", sheet("Data") firstrow
*tab series
* Services, value added (% of GDP)
*sum y*
* We remove the 100 and values above 100.
*foreach X of numlist 1960(1)2019 {
*replace y`X' = . if y`X' >= 99.9
*}
* we take the mean of all available measures
*collapse (mean) y*, by(country ccode)
*save temp_serv2, replace
use temp_serv2, clear
drop if ccode == ""
reshape long y, i(ccode) j(year)
ren y sh
drop country
sort ccode year
foreach X in 5 4 3 2 1 {
bysort ccode: gen lag`X'sh = sh[_n-`X']
}
foreach X in 1 2 3 4 5 {
bysort ccode: gen lead`X'sh = sh[_n+`X']
}
order ccode year lag* sh* lead*
gen serv2sh_ma0 = sh
foreach X in 1 2 3 4 5 {
egen serv2sh_ma`X' = rmean(sh lag`X'sh-lead`X'sh)
}
corr serv2sh_ma*
replace year = 2020 if year == 2019
gen lastyear = substr(string(year),4,1)
destring lastyear, replace
keep if lastyear == 5 | lastyear == 0
keep ccode year serv2sh_ma*
sort ccode year
save serv2_all, replace

*** We combine all of these *** 

use serv_all, clear
sort ccode year
merge ccode year using serv2_all
tab _m
drop _m
* These are different moving averages with various numbers of leads/lags since there are countries where data is patchy. 
foreach X in 0 1 2 3 4 5 {
replace serv2sh_ma`X' = servsh_ma`X' if servsh_ma`X' != .
}
codebook serv*sh_ma0
* There are fewer missings with serv2 
* We use "Services, etc., value added (% of GDP)" as the main variable 
* We complement with "Services, value added (% of GDP)" when needed
sort ccode year
merge ccode year using mfg_all
tab _m
drop _m
foreach X in 0 1 2 3 4 5 {
gen mfgservsh_ma`X' = servsh_ma`X' + mfgsh_ma`X' if servsh_ma`X' != . & mfgsh_ma`X' != .
}
foreach X in 0 1 2 3 4 5 {
gen mfgserv2sh_ma`X' = serv2sh_ma`X' + mfgsh_ma`X' if serv2sh_ma`X' != . & mfgsh_ma`X' != .
}
keep ccode year *ma5 *ma4 *ma3 *ma2
sort ccode year
save mfgserv_all, replace

*** Adding to the main data set *** 

use LACdata, clear
sort ccode year
merge ccode year using mfgserv_all
tab _m
tab year if _m == 1
drop if _m == 2
drop _m
gen mfgshare = mfgsh_ma5
gen servshare = servsh_ma5
gen mfgservshare = mfgshare+servshare if mfgshare != . & servshare != . 
label var mfgshare "Share of mfg in GDP in t (MA5)"
label var servshare "Share of serv in GDP in t (MA5)"
label var mfgservshare "Share of mfg+serv in GDP in t (MA5)"
drop servsh_ma* mfgsh_ma* mfgserv2sh_ma* serv2sh_ma* mfgservsh_ma*
sort ccode year
save LACdata, replace

****************************
*** PER CAPITA GDP (PPP) ***
****************************

***** LEVEL IN 2020 *****

* GDP per capita, PPP (constant 2017 international $) * 
* Source: WB WDI database 
* Last accessed 01-14-2021
clear
import excel "Data_Extract_From_World_Development_Indicators (19).xlsx", sheet("Data") firstrow clear
* Data from 1990 to 2019 only
gen pcgdp_today_n = .
foreach X of numlist 2020 2019 2018 2017 2016 2015 2014 2013 2012 2011 2010 {
replace pcgdp_today_n = y`X' if pcgdp_today == .
}
keep ccode *_n
sort ccode 
save pcgdp_today, replace

* We add to the data *
use LACdata, clear
sort ccode
merge ccode using pcgdp_today
tab _m
tab country_wb if _m == 1
* Taiwan * 
drop if _m == 2
drop _m
* Taiwan is close to Sweden in 2019
* We use information from Wikipedia 
* Source: https://en.wikipedia.org/wiki/List_of_countries_by_GDP_(PPP)_per_capita
* Sweden 55,820
* Taiwan 55,078
replace pcgdp_today_n = 53239.82*(55078/55820) if country_wb == "Taiwan"
sum pcgdp_today_n if country_wb == "Taiwan"
codebook pcgdp_today_n
tab country_wb if pcgdp_today_n == .
gen lpcgdp_today_n = log(pcgdp_today_n)
ren pcgdp_today_n pcgdp_today_latestwdi
ren lpcgdp_today_n lpcgdp_today_latestwdi
save LACdata, replace
desc *pcgdp*

* Given the pcGDP data is missing before 1990, we rely on the Maddison database
* However, the database stops in 2010
* We thus combine the Maddison (1960-2010) and WDI (2010-2020) information

*** WDI GROWTH RATES 2010-2020 ***

* From WDI. Based on GDP per capita, PPP (constant 2017 international $) *
* Source: WB WDI database 
* Last accessed 01-14-2021
clear
import excel "Data_Extract_From_World_Development_Indicators (19).xlsx", sheet("Data") firstrow clear
drop if ccode == ""
reshape long y, i(ccode) j(year) string
gen gr = (y/y[_n-1]-1)*100
destring year, replace
* growth rates from 2016 (2015-2016) to 2020 (2019-2020)
gen period = 2020 if year >= 2016 & year <= 2020
* growth rates from 2011 (2010-2011) to 2015 (2014-2015)
replace period = 2015 if year >= 2011 & year <= 2015
drop if period == .
collapse (mean) gr, by(ccode period)
ren period year
sort ccode year
save gr, replace

*** MADDISON GDP DATA 1960-2010 ***

* Modified Maddison data *
* Obtained from Source: Douglas Gollin & Remi Jedwab & Dietrich Vollrath, 2016. "Urbanization with and without industrialization," Journal of Economic Growth, Springer, vol. 21(1), pages 35-70, March.
clear
import excel "maddison2.xlsx", sheet("Sheet1") firstrow clear
reshape long y, i(country) j(year)
ren y pcgdpm
label var pcgdpm "GDP per capita, PPP (Geary-Khamis 1990) - Maddison"
replace country = "Cote d'Ivoire" if country == "Côte d'Ivoire"
sort country year
save maddison, replace

* List of 116 countries *
use urban_sample, clear
keep country
bysort country: keep if _n == 1
sort country 
save list116, replace 

* We also rely on older WDI data 
* The file "wdi_new_remi" was created by Remi Jedwab for the work on "Urbanization with and without industrialization"
* It has "GDP per capita, PPP (constant 2005 international $) - WDI"
* This file is not available anymore on the website of the World Bank
* Last accessed: 07-02-2013
clear
import excel "wdi_new_remi.xls", sheet("Sheet1") firstrow clear
reshape long y, i(country) j(year)
replace country = "Bolivia (Plurinational State of)" if country == "Bolivia"
replace country = "Congo" if country == "Congo, Rep."
*replace country = "C𴥠d'Ivoire" if country == "Cote d'Ivoire"
replace country = "Dem. People's Republic of Korea" if country == "Korea, Dem. Rep."
replace country = "Democratic Republic of the Congo" if country == "Congo, Dem. Rep."
replace country = "Dominican Republic" if country == "Dominican Rep"
replace country = "Iran (Islamic Republic of)" if country == "Iran, Islamic Rep."
replace country = "Lao People's Democratic Republic" if country == "Lao PDR"
replace country = "Myanmar" if country == "Burma"
replace country = "Republic of Korea" if country == "Korea, Rep."
replace country = "Syrian Arab Republic" if country == "Syria"
replace country = "Trinidad and Tobago" if country == "Trinidad"
replace country = "United Arab Emirates" if country == "UAE"
replace country = "United Republic of Tanzania" if country == "Tanzania"
replace country = "Venezuela (Bolivarian Republic of)" if country == "Venezuela, RB"
replace country = "Viet Nam" if country == "Vietnam"
replace country = "Yemen" if country == "Yemen, Rep."
replace country = "Bahamas" if country == "Bahamas, The"
replace country = "China, Hong Kong SAR" if country == "Hong Kong SAR, China"
replace country = "China, Macao SAR" if country == "Macao SAR, China"
replace country = "Egypt" if country == "Egypt, Arab Rep."
replace country = "Gambia" if country == "Gambia, The"
ren y pcgdp
sort country
merge country using list116
tab _m
tab country if _m == 2
* We only need the 116 countries in the list
drop if _m == 1
drop _m
codebook country
* 116
drop BC BD BE BF country2
sort country year 
codebook pcgdp 
label var pcgdp "GDP per capita, PPP (constant 2005 international $) - WDI"
* Many missing in WDI (GDP per capita, PPP (constant 2005 international $))
sort country year
merge country year using maddison
tab _m
tab country if _m == 1 
tab country if _m == 1 & pcgdp == .
* Countries not in Maddison
* But these countries already have pcgdp data
tab country if _m == 2
drop if _m == 2
drop _m
gsort country -year
* We use WDI as the baseline
* wWe use Maddison to obtain past growth rates, thus adjusting the WDI with Maddison
bysort country: gen growthratem = pcgdpm[_n-1]/pcgdpm[_n] 
bysort country: replace pcgdp  = pcgdp[_n-1]/growthratem if pcgdp == .
codebook pcgdp
* No missing for the period 1960-2010 (data used in GJV)
keep country year pcgdp
ren country country_gjv
sort country_gjv year
save pcgdp_new, replace

use LACdata, clear
replace country_gjv = "Cote d'Ivoire" if country_wup == "Côte d'Ivoire"
sort country_gjv year
drop pcgdp
merge country_gjv year using pcgdp_new
tab _m if year >= 1960 & year <= 2010
drop if _m == 2
drop _m
codebook pcgdp if year >= 1960 & year <= 2010
* No missings
codebook pcgdp if year >= 1960 & year <= 2020
sort ccode year
merge ccode year using gr
tab _m if year >= 2015 & year <= 2020
tab ccode if _m == 1 & year >= 2015 & year <= 2020
drop if _m == 2 
drop _m
* Missings:
codebook gr if year >= 2015 & year <= 2020
tab ccodeyr if year >= 2015 & year <= 2020 & gr == .
* For these, we obtain the growth rates from other sources 
* Cuba = source: Constant GDP per capita for Cuba, 2010 U.S. Dollars from WDI as reported by FRED
replace gr = 1.026300213 if ccodeyr == "CUB 2015"
replace gr = 1.014841872 if ccodeyr == "CUB 2020"
* Eritrea =  source: IMF WEO. Gross domestic product per capita, current prices PURCHASING POWER PARITY; INTERNATIONAL DOLLARS. As of 02-05-2021
replace gr = 6.30 if ccodeyr == "ERI 2015"
replace gr = 3.11 if ccodeyr == "ERI 2020"
* Somalia = source: CIA World Factbook
replace gr = -9.639799639 if ccodeyr == "SOM 2015"
replace gr = 0 if ccodeyr == "SOM 2020"
* Syria = source: CIA World Factbook
replace gr = 2.867965368 if ccodeyr == "SYR 2015"
replace gr = 0 if ccodeyr == "SYR 2020"
** Taiwan = source: Knoema 
replace gr = 4.089641534 if ccodeyr == "TWN 2015"
replace gr = 2.706325898 if ccodeyr == "TWN 2020"
** Venezuela = source: World Bank, International Comparison Program database.
replace gr = -0.935245178 if ccodeyr == "VEN 2015"
replace gr = -16.30628181 if ccodeyr == "VEN 2020"
** Yemen = source: World Bank, International Comparison Program database.
replace gr = -7.015823651 if ccodeyr == "YEM 2015"
replace gr = -9.64000604 if ccodeyr == "YEM 2020"
* with the annual growth rates, we reconstruct the full series
sum gr, d
* We now convert to 5-year changes
replace gr = (1+gr/100)
sum gr, d
replace gr = gr^5
sum gr, d
* We reconstruct for 2015
codebook gr if year == 2015
replace pcgdp = pcgdp[_n-1]*gr if year == 2015 
sum pcgdp if year == 2015
sum pcgdp if year == 2010
* We reconstruct for 2020
codebook gr if year == 2020
replace pcgdp = pcgdp[_n-1]*gr if year == 2020
sum pcgdp if year == 2020
sum pcgdp if year == 2015
codebook pcgdp
desc pcgdp*
label var pcgdp "pcGDP PPP (cst 2005 intl $) WDI as base 6010, Maddison before, WDI 2017$ 1020)
drop gr
label var lpcgdp "Log pcGDP PPP (cst 2005 intl $) WDI as base 6010, Maddison before, WDI 2017$ 1020)
sort ccode year
save LACdata, replace

*** Mean 1960-2010 and 1960-2020 ***

use LACdata, clear
codebook pcgdp if year >= 1960
* No missing 
foreach X in 2010 2020 {
gen pcgdp_`X' = pcgdp if year >= 1960 & year <= `X'
bysort ccode: egen pcgdpmean1960`X' = mean(pcgdp_`X')
label var pcgdpmean1960`X' "Mean pcgdp 1960-`X'"
drop pcgdp_`X'
gen lpcgdpmean1960`X' = log(pcgdpmean1960`X')
label var lpcgdpmean1960`X' "Log of mean pcgdp 1960-`X'"
}
codebook lpcgdpmean1960*
foreach X in 2010 2020 {
gen pcgdp_`X' = pcgdp if year >= 1970 & year <= `X'
bysort ccode: egen pcgdpmean1970`X' = mean(pcgdp_`X')
label var pcgdpmean1970`X' "Mean pcgdp 1960-`X'"
drop pcgdp_`X'
gen lpcgdpmean1970`X' = log(pcgdpmean1970`X')
label var lpcgdpmean1970`X' "Log of Mean pcgdp 1960-`X'"
}
codebook lpcgdpmean1970*
gen lpcgdp = log(pcgdp)
label var lpcgdp "Log of per capita GDP"
foreach X in 2000 2010 2020 {
gen lpcgdp_`X' = log(pcgdp) if year >= 1960 & year <= `X'
bysort ccode: egen meanlpcgdp1960`X' = mean(lpcgdp_`X')
drop lpcgdp_`X'
label var meanlpcgdp1960`X' "Mean of log mean pcgdp 1960-`X'"
}
foreach X in 2000 2010 2020 {
gen lpcgdp_`X' = log(pcgdp) if year >= 1970 & year <= `X'
bysort ccode: egen meanlpcgdp1970`X' = mean(lpcgdp_`X')
drop lpcgdp_`X'
label var meanlpcgdp1970`X' "Mean of log mean pcgdp 1970-`X'"
}
sort ccode year
save LACdata, replace

***************************************************
*** MANUFACTURING AND SERVICES IN GDP 2010-2020 ***
***************************************************

use LACdata, clear
* Countries where missing data *
foreach Y in 2010 2015 2020 {
tab ccode if mfgservshare == . & year == `Y'
}
* We replace the missing values by values found from different sources indicated below.
** Djibouti **
tab year mfgshare if ccode == "DJI" & year >= 2010 & year <= 2020
tab year servshare if ccode == "DJI" & year >= 2010 & year <= 2020
* Services = 80.2% (2017). Source: CIA (2021): https://www.cia.gov/the-world-factbook/countries/djibouti/#economy
replace servshare = 80.2 if ccode == "DJI" & year >= 2015 & year <= 2020
replace mfgservshare = mfgshare+servshare if ccode == "DJI" & year >= 2015 & year <= 2020
** Eritrea ** 
tab year mfgshare if ccode == "ERI" & year >= 2010 & year <= 2020
tab year servshare if ccode == "ERI" & year >= 2010 & year <= 2020
* Missing 2015 2020
* Services = 58.7% (2017). Source: CIA (2021): https://www.cia.gov/the-world-factbook/countries/eritrea/#economy
replace servshare = 58.7 if ccode == "ERI" & year >= 2015 & year <= 2020
* Manufacturing = 5.9 (2016). Source: African Economic Outlook (2021): https://www.africaneconomicoutlook.org/eritrea/.
replace mfgshare = 5.9 if ccode == "ERI" & year >= 2015 & year <= 2020
replace mfgservshare = mfgshare+servshare if ccode == "ERI" & year >= 2015 & year <= 2020
** Haiti ** 
tab year mfgshare if ccode == "HTI" & year >= 2010 & year <= 2020
tab year servshare if ccode == "HTI" & year >= 2010 & year <= 2020
* Services = 59% (2014). Source: UNDP (2021): https://www.undp.org/content/dam/haiti/docs/mdg/UNDP-HT-HaitiRapportOMD2013_20140611.pdf
* Services = 60% (2017) . Source: Le National (2021): http://www.lenational.org/post_free.php?elif=1_CONTENUE/tribunes&rebmun=515
replace servshare = 59 if ccode == "HTI" & year == 2010
replace servshare = 60 if ccode == "HTI" & year >= 2015 & year <= 2020
replace mfgservshare = mfgshare+servshare if ccode == "HTI" & year >= 2010 & year <= 2020
** Lybia ** 
tab year mfgshare if ccode == "LBY" & year >= 2010 & year <= 2020
tab year servshare if ccode == "LBY" & year >= 2010 & year <= 2020
* Services = 46.4% (2017). Source: CIA (2021): https://www.cia.gov/the-world-factbook/countries/libya/#economy
* Manufacturing = 3.4%. Source: OECD (2017): https://www.oecd-ilibrary.org/docserver/aeo-2017-40-en.pdf?expires=1612149926&id=id&accname=guest&checksum=DD6E54DCA584E96B0A715BE5CE7A427E 
replace mfgshare = 3.4 if ccode == "LBY" & year >= 2015 & year <= 2020
replace servshare = 46.4 if ccode == "LBY" & year >= 2015 & year <= 2020
replace mfgservshare = mfgshare+servshare if ccode == "LBY" & year >= 2015 & year <= 2020
** Solomon Islands ** 
tab year mfgshare if ccode == "SLB" & year >= 2010 & year <= 2020
tab year servshare if ccode == "SLB" & year >= 2010 & year <= 2020
* Services = 58.1% (2017). Source: CIA (2021): https://www.cia.gov/the-world-factbook/countries/solomon-islands/
* Manufacturing = 8.0% (2016). Source: Solomon Chamber (2017): http://www.solomonchamber.com.sb/media/1829/gdp-publication-2003-2017-23032020.pdf
replace mfgshare = 8.0 if ccode == "SLB" & year >= 2015 & year <= 2020
replace servshare = 58.1 if ccode == "SLB" & year >= 2015 & year <= 2020
replace mfgservshare = mfgshare+servshare if ccode == "SLB" & year >= 2015 & year <= 2020
** Syria **
tab year mfgshare if ccode == "SYR" & year >= 2010 & year <= 2020
tab year servshare if ccode == "SYR" & year >= 2010 & year <= 2020
* Services = 60.8% (2017). Source: CIA (2021): https://www.cia.gov/the-world-factbook/countries/syria/#economy
* Manufacturing = cannot find it. Assume same as in 2010.
replace mfgshare = 10.90527 if ccode == "SYR" & year >= 2015 & year <= 2020
replace servshare = 60.8 if ccode == "SYR" & year >= 2015 & year <= 2020
replace mfgservshare = mfgshare+servshare if ccode == "SYR" & year >= 2015 & year <= 2020
** Taiwan **
tab year mfgshare if ccode == "TWN" & year >= 2010 & year <= 2020
tab year servshare if ccode == "TWN" & year >= 2010 & year <= 2020
* Manufacturing = Statista (2020): https://www.statista.com/statistics/706395/taiwan-gdp-share-across-industry-sectors/#:~:text=In%202019%2C%20the%20manufacturing%20sector,originated%20from%20the%20construction%20industry.
replace mfgshare = 28.6 if ccode == "TWN" & year == 2010
replace mfgshare = 31.36 if ccode == "TWN" & year == 2015
replace mfgshare = 31.27 if ccode == "TWN" & year == 2020
* Services = 68.8% (2011). Source: COUNCIL FOR ECONOMIC PLANNING AND DEVELOPMENT EXECUTIVE YUAN, R.O.C. (TAIWAN) (2021): https://ws.ndc.gov.tw/Download.ashx?u=LzAwMS9hZG1pbmlzdHJhdG9yLzEwL3JlbGZpbGUvNTYwNy83MzIvMDAxNzUwOV8xLnBkZg%3D%3D&n=MjAxMl%2FntpPlu7rmnINf6Ie654Gj55m85bGV6Iux5paH55Wr5YaKX%2BeAj%2BimvS5wZGY%3D&icon=..pdf
* Services = 62.1% (2017). Source: CIA (2021). https://www.cia.gov/the-world-factbook/countries/taiwan/#economy
replace servshare = 68.8 if ccode == "TWN" & year == 2010
replace servshare = 62.1 if ccode == "TWN" & year >= 2015 & year <= 2020
replace mfgservshare = mfgshare+servshare if ccode == "TWN" & year >= 2010 & year <= 2020
** Somalia **
tab country_wb if ccode == "SOM"
tab year mfgshare if ccode == "SOM" & year >= 2010 & year <= 2020
tab year servshare if ccode == "SOM" & year >= 2010 & year <= 2020
* What we had before in GJV * 
*tab mfg_gdp2010 if ccode == "SOM" & year == 2010
* 7.2 - what we had in GJV
*tab serv_gdp2010 if ccode == "SOM" & year == 2010
* 33.5 - what we had in GJV
* Services = 32.5% (2013). Source: CIA (2021): https://www.cia.gov/the-world-factbook/countries/somalia/#economy
* Manufacturing = 2.5% (2014). Source: Organisation of Islamic Cooperation (2016): https://www.sesric.org/files/article/714.pdf
* We use what we had in GJV
replace mfgshare = 7.2 if ccode == "SOM" & year == 2010 
replace servshare = 33.5 if ccode == "SOM" & year == 2010 
replace mfgshare = 7.2 if ccode == "SOM" & year >= 2015 & year <= 2020
replace servshare = 33.5 if ccode == "SOM" & year >= 2015 & year <= 2020
replace mfgservshare = mfgshare+servshare if ccode == "SOM" & year >= 2010 & year <= 2020
save LACdata, replace
* no missings in 2010-2020
codebook mfgshare servshare mfgservshare if year >= 2010
codebook mfgshare servshare mfgservshare if year >= 1960

***************************************************
*** MANUFACTURING AND SERVICES IN GDP 1960-1970 ***
***************************************************

*** Missings in 1960-1970 ***
use LACdata, clear
* Data set for Federico * 
tab ccode if mfgservshare == . & year == 1970
tab country_wb if mfgservshare == . & year == 1960
* 49
tab country_wb if mfgservshare == . & year == 1965
* 39
tab country_wb if mfgservshare == . & year == 1970
* 32
keep if year >= 1960 & year <= 1975
keep country_wb ccode year mfgshare servshare
order country_wb ccode year mfgshare servshare 
gen mfgmiss = 1 if mfgshare == .
gen servmiss = 1 if servshare == .
bysort ccode: egen mfgmiss_yn = max(mfgmiss)
bysort ccode: egen servmiss_yn = max(servmiss)
keep if mfgmiss_yn == 1 | servmiss_yn == 1
keep country_wb ccode year mfgshare servshare
order country_wb ccode year mfgshare servshare
gen mfgshare_f = .
gen servshare_f = .
gen yearused = ""
gen source = ""
count
* 196
* We find them using other sources
* We report the values we find in this excel file
* The sources are indicated in the last column
clear
import excel "mfg_serv_missing_1960_1970_Federico_02032021_Remi.xls", sheet("Sheet1") firstrow clear
keep ccode year *newobs
sort ccode year
save mfg_serv_missing_1960_1970, replace 
count
* 196
* Adding to the main data set 
use LACdata, clear
sort ccode year
merge ccode year using mfg_serv_missing_1960_1970
tab _m
drop _m
* We use these new variables if the share is missing
replace mfgshare = mfgshare_newobs if mfgshare == .
replace servshare = servshare_newobs if servshare == .
replace mfgservshare = mfgshare+servshare if mfgservshare == .
* We recreate the values circa 1960 using first 1960 when available, then 1965, then 1970 if still not available
foreach X in 1960 1965 1970 1975 {
foreach Z in mfg serv {
gen `Z'share_`X' = `Z'share if year == `X'
bysort ccode: egen `Z'share`X' = max(`Z'share_`X')
drop `Z'share_`X'
}
}
* 1960
foreach Z in mfg serv {
gen `Z'_ca1960 = .
foreach X in 1960 1965 1970 {
replace `Z'_ca1960 = `Z'share`X' if `Z'_ca1960 == .
}
}
* 1970
foreach Z in mfg serv {
gen `Z'_ca1970 = .
foreach X in 1970 1975 1965 {
replace `Z'_ca1970 = `Z'share`X' if `Z'_ca1970 == .
drop `Z'share`X'
}
}
gen mfgserv_ca1960 = mfg_ca1960+serv_ca1960
gen mfgserv_ca1970 = mfg_ca1970+serv_ca1970
codebook mfgserv_ca1960 if year == 1960
* only missing Eritrea
tab ccode if mfgserv_ca1960 == .
* We won't be able to add for 1960
drop *newobs 
label var mfgshare1960 "Manufacturing share of GDP in 1960"
label var servshare1960 "Service share of GDP in 1960"
label var mfg_ca1960 "Manufacturing share of GDP circa 1960"
label var serv_ca1960 "Manufacturing share of GDP circa 1960"
label var mfg_ca1970 "Manufacturing share of GDP circa 1970"
label var serv_ca1970 "Manufacturing share of GDP circa 1970"
label var mfgserv_ca1960 "Manufacturing+service share of GDP circa 1960"
label var mfgserv_ca1970 "Manufacturing+service share of GDP circa 1970"
save LACdata, replace 

*********************************************
*** MANUFACTURING, FIRE, AND IC GDP SHARE ***
*********************************************

* Source: UN-SNA
* Url: https://unstats.un.org/unsd/nationalaccount/sna.asp
* Last accessed: 03-09-2021

* Created in the folder "UNSNA" * 
* See the do-file "fire_gdp_FINAL"
*** ISIC3 AND ISIC4 FROM UN-SNA ***
use gdpsh_unsna, clear
keep country_gjv year *_un
sort country_gjv year
save gdpsh_unsna2, replace
* We add to the main data set
use LACdata, clear
replace country_gjv = "Cote d'Ivoire" if country_wb == "Côte d'Ivoire"
sort country_gjv year
merge country_gjv year using gdpsh_unsna2
tab _m
drop _m
save LACdata, replace

* Country-years with FIRE data
use gdpsh_unsna, clear
bysort year: count if gdpsh_fire_ma5_un != .

* See the do-file "fire_gdp_FINAL"
* Created in the folder "UNSNA" * 
*** BROAD CATEGORIES FROM UN-SNAAMA ***
use gdpsh_unsnaama, clear
sort country_gjv year
save gdpsh_unsnaama, replace
* We add to the main data set
use LACdata, clear
sort country_gjv year
merge country_gjv year using gdpsh_unsnaama
tab _m
drop _m
codebook gdpsh*_*ama* if year == 2020
gen gdpsh_mfgserv_snaama_ma2 = gdpsh_mfg_snaama_ma2+gdpsh_serv_snaama_ma2
save LACdata, replace

*** COMPLETING THE MISSING SHARES WITH UN-SNAAMA AND COMPARING ***
use LACdata, clear
keep if year >= 1960
tab region, m
* For Taiwan and the interpolation, we need to add a few observations 
replace mfgshare = 36 if ccodeyr == "TWN 1980"
replace mfgshare = 45*0.92 if ccodeyr == "TWN 1980"
replace mfgshare = 26.4 if ccodeyr == "TWN 2000"
* We use the following sources: 
* source 1980 = https://www.soas.ac.uk/taiwanstudies/eats/eats2005/file23991.pdf
* source 1986 = https://web.archive.org/web/20100222091017/http://www.taiwan.com.au/Polieco/Industry/Major/report01.html
* source 2000 = https://www.nationsencyclopedia.com/Asia-and-Oceania/Taiwan-INDUSTRY.html
*twoway (scatter servshare year) if ccode == "TWN"
replace servshare = 46.6 if ccodeyr == "TWN 1980"
replace servshare = 54.6 if ccodeyr == "TWN 1990"
replace servshare = 62.3 if ccodeyr == "TWN 2000"
* source all yrs = https://library.fes.de/libalt/journals/swetsfulltext/11241653.pdf
* For these, we use the document values_sources in the folder.
replace servshare = 23 if ccodeyr == "BLZ 1960"
replace servshare = 24 if ccodeyr == "BLZ 1965"
replace mfgshare = 9 if ccodeyr == "IRQ 1960"
replace servshare = 29 if ccodeyr == "IRQ 1960"
* Interpolated versions *
sort ccode year
bysort ccode: ipolate mfgshare year, gen(mfgshare_i)
bysort ccode: ipolate servshare year, gen(servshare_i)
* We check which ones are still missing. 
* We check region by region 
*** COMPARING FIRST, REGION BY REGION ***
** LAC **
* MFG
*twoway (scatter mfgshare year) (connected gdpsh_mfg_snaama_ma2 year) if region == "Latin America & Caribbean", legend(order(1 "Ourdata" 2 "SNA-AMA")) by(ccode) xlabel(1960(20)2020) xtitle(, size(zero))
* SERV
*twoway (scatter servshare year) (connected gdpsh_serv_snaama_ma2 year) if region == "Latin America & Caribbean", legend(order(1 "Ourdata" 2 "SNA-AMA")) by(ccode) xlabel(1960(20)2020) xtitle(, size(zero))
tab ccode year if mfgshare == . & gdpsh_mfg_snaama_ma2 != . & region == "Latin America & Caribbean" 
tab ccode year if servshare == . & gdpsh_serv_snaama_ma2 != . & region == "Latin America & Caribbean" 
* Only BHS and HTI
*twoway (connected gdpsh_mfg_snaama_ma2 year)(scatter mfgshare_i year) if ccode == "BHS" | ccode == "HTI", legend(order(2 "Ourdata" 1 "SNA-AMA")) by(ccode) xlabel(1960(10)2020) xtitle(, size(zero))
*twoway (connected gdpsh_serv_snaama_ma2 year)(scatter servshare_i year) if ccode == "BHS" | ccode == "HTI", legend(order(2 "Ourdata" 1 "SNA-AMA")) by(ccode) xlabel(1960(10)2020) xtitle(, size(zero))
replace mfgshare = mfgshare_i if mfgshare == . & (ccode == "BHS" | ccode == "HTI")
replace servshare = servshare_i if servshare == . & (ccode == "BHS" | ccode == "HTI")
* For 1960-1970, we will assume they are like 1970 if needed
tab ccodeyr if mfgshare == . & region == "Latin America & Caribbean" 
tab ccodeyr if servshare == . & region == "Latin America & Caribbean" 
*twoway (connected gdpsh_mfg_snaama_ma2 year)(scatter mfgshare_i year) if ccode == "CUB" | ccode == "BLZ", legend(order(2 "Ourdata" 1 "SNA-AMA")) by(ccode) xlabel(1960(10)2020) xtitle(, size(zero))
*twoway (connected gdpsh_serv_snaama_ma2 year)(scatter servshare_i year) if ccode == "CUB" | ccode == "BLZ", legend(order(2 "Ourdata" 1 "SNA-AMA")) by(ccode) xlabel(1960(10)2020) xtitle(, size(zero))
** SOUTH ASIA **
* MFG
*twoway (connected gdpsh_mfg_snaama_ma2 year)(scatter mfgshare year) if region == "South Asia", legend(order(2 "Ourdata" 1 "SNA-AMA")) by(ccode) xlabel(1960(20)2020) xtitle(, size(zero))
* SERV
*twoway (connected gdpsh_serv_snaama_ma2 year)(scatter servshare year) if region == "South Asia", legend(order(2 "Ourdata" 1 "SNA-AMA")) by(ccode) xlabel(1960(20)2020) xtitle(, size(zero))
tab ccode year if mfgshare == . & gdpsh_mfg_snaama_ma2 != . & region == "South Asia" 
tab ccode year if servshare == . & gdpsh_serv_snaama_ma2 != . & region == "South Asia" 
* We can replace by UN SNA-AMA, maybe better than interpolation
replace mfgshare = gdpsh_mfg_snaama_ma2 if mfgshare == . & gdpsh_mfg_snaama_ma2 != . & region == "South Asia" 
replace servshare = gdpsh_serv_snaama_ma2 if servshare == . & gdpsh_serv_snaama_ma2 != . & region == "South Asia" 
tab ccodeyr if mfgshare == . & region == "South Asia" 
tab ccodeyr if servshare == . & region == "South Asia"
** EAST ASIA **
* MFG
*twoway (connected gdpsh_mfg_snaama_ma2 year)(scatter mfgshare year) if region == "East Asia & Pacific", legend(order(2 "Ourdata" 1 "SNA-AMA")) by(ccode) xlabel(1960(20)2020) xtitle(, size(zero))
* SERV
*twoway (connected gdpsh_serv_snaama_ma2 year)(scatter servshare year) if region == "East Asia & Pacific", legend(order(2 "Ourdata" 1 "SNA-AMA")) by(ccode) xlabel(1960(20)2020) xtitle(, size(zero))
tab ccode year if mfgshare == . & gdpsh_mfg_snaama_ma2 != . & region == "East Asia & Pacific" 
tab ccode year if servshare == . & gdpsh_serv_snaama_ma2 != . & region == "East Asia & Pacific" 
* We can replace by interpolation (better than SNA-AMA for some)
replace mfgshare = mfgshare_i if mfgshare == . & gdpsh_mfg_snaama_ma2 != . & region == "East Asia & Pacific" 
replace servshare = servshare_i if servshare == . & gdpsh_serv_snaama_ma2 != . & region == "East Asia & Pacific" 
replace mfgshare = mfgshare_i if mfgshare == . & region == "East Asia & Pacific" 
replace servshare = servshare_i if servshare == . & region == "East Asia & Pacific" 
** MENA **
* MFG
*twoway (connected gdpsh_mfg_snaama_ma2 year)(scatter mfgshare year) if region == "Middle East & North Africa", legend(order(2 "Ourdata" 1 "SNA-AMA")) by(ccode) xlabel(1960(20)2020) xtitle(, size(zero))
* SERV
*twoway (connected gdpsh_serv_snaama_ma2 year)(scatter servshare year) if region == "Middle East & North Africa", legend(order(2 "Ourdata" 1 "SNA-AMA")) by(ccode) xlabel(1960(20)2020) xtitle(, size(zero))
tab ccode year if mfgshare == . & gdpsh_mfg_snaama_ma2 != . & region == "Middle East & North Africa" 
tab ccode year if servshare == . & gdpsh_serv_snaama_ma2 != . & region == "Middle East & North Africa" 
* We can replace by interpolation (better than SNA-AMA for some)
replace mfgshare = mfgshare_i if mfgshare == . & gdpsh_mfg_snaama_ma2 != . & region == "Middle East & North Africa" 
replace servshare = servshare_i if servshare == . & gdpsh_serv_snaama_ma2 != . & region == "Middle East & North Africa" 
** SSA **
* MFG
*twoway (connected gdpsh_mfg_snaama_ma2 year)(scatter mfgshare year) if region == "Sub-Saharan Africa", legend(order(2 "Ourdata" 1 "SNA-AMA")) by(ccode) xlabel(1960(20)2020) xtitle(, size(zero))
* SERV
*twoway (connected gdpsh_serv_snaama_ma2 year)(scatter servshare year) if region == "Sub-Saharan Africa", legend(order(2 "Ourdata" 1 "SNA-AMA")) by(ccode) xlabel(1960(20)2020) xtitle(, size(zero))
tab ccode year if mfgshare == . & gdpsh_mfg_snaama_ma2 != . & region == "Sub-Saharan Africa" 
tab ccode year if servshare == . & gdpsh_serv_snaama_ma2 != . & region == "Sub-Saharan Africa" 
* We can replace by interpolation (better than SNA-AMA for some)
replace mfgshare = mfgshare_i if mfgshare == . & gdpsh_mfg_snaama_ma2 != . & region == "Sub-Saharan Africa" 
replace servshare = servshare_i if servshare == . & gdpsh_serv_snaama_ma2 != . & region == "Sub-Saharan Africa" 
** Which ones are still missing? **
codebook mfgshare servshare if year > 1970
tab ccodeyr if mfgshare == . & year > 1970
tab ccodeyr if servshare == . & year > 1970
* OK, we won't find
codebook mfgshare servshare if year >= 1960 & year <= 1970
tab ccodeyr if mfgshare == . & year == 1970
tab ccodeyr if servshare == . & year == 1970
tab ccodeyr if mfgshare == . & year == 1960
tab ccodeyr if servshare == . & year == 1960
foreach X in mfg serv {
gen `X'share70 = `X'share if year == 1970
bysort ccode: egen max`X'share70 = max(`X'share70)
replace `X'share = max`X'share70 if year == 1965 & `X'share == .
}
drop *share70
foreach X in mfg serv {
gen `X'share65 = `X'share if year == 1965
bysort ccode: egen max`X'share65 = max(`X'share65)
replace `X'share = max`X'share65 if year == 1960 & `X'share == .
}
drop *share65
tab ccodeyr if mfgshare == . 
tab ccodeyr if servshare == . 
* only missing Eritrea 1960-1985
replace mfgservshare = mfgshare + servshare
drop mfgshare_i servshare_i
* We label some variables
label var gdpsh_mfg_snaama_ma2 "GDP share MFG t UN-SNA MA2"
label var gdpsh_serv_snaama_ma2 "GDP share SERV t UN-SNA MA2"
label var gdpsh_mfgserv_snaama_ma2 "GDP share MFGSERV t UN-SNA MA2"
save LACdata, replace

****************************
*** ADDITIONAL VARIABLES ***
****************************

* We create a few additional variables 
use LACdata, clear
* Main variables defined as of 1960 *
foreach X in urbrate nrxag_sh_gdp {
gen `X'60 = `X' if year == 1960
bysort ccode: egen `X'1960 = max(`X'60)
drop `X'60
}
foreach X in urbrate nrxag_sh_gdp {
gen `X'70 = `X' if year == 1970
bysort ccode: egen `X'1970 = max(`X'70)
drop `X'70
}
label var urbrate1960 "Urban share 1960"
label var urbrate1970 "Urban share 1970"
label var nrxag_sh_gdp1960 "Share NRX in GDP 1960"
label var nrxag_sh_gdp1970 "Share NRX in GDP 1970"
save LACdata, replace

************************************
*** IPUMS INDUSTRIAL COMPOSITION ***
************************************

** Created from the IPUMS data 
* See the "Ipums" folder 

***** DISTRIBUTION OF THE YEARS IN OUR IPUMS SECTORAL DATA *****

use LACdata, clear
keep if year == 2010
keep year country_gjv pop
sort country_gjv 
save temp, replace

* POP-WEIGHTED MEAN *
* Data set with the sectoral composition in IPUMS  
* Created in the folder "IPUMS and other files"
use "dataset_industry_ipums_all(09.16.2021).dta", clear
count
* 250
ren country country_gjv
replace country_gjv = "Bolivia (Plurinational State of)" if country_gjv == "Bolivia"
replace country_gjv = "Egypt" if country_gjv == "Egypt, Arab Rep."
replace country_gjv = "Iran (Islamic Republic of)" if country_gjv == "Iran, Islamic Rep."
replace country_gjv = "Lao People's Democratic Republic" if country_gjv == "Lao PDR"
replace country_gjv = "United Republic of Tanzania" if country_gjv == "Tanzania"
replace country_gjv = "Venezuela (Bolivarian Republic of)" if country_gjv == "Venezuela, RB"
replace country_gjv = "Viet Nam" if country_gjv == "Vietnam"
sort country_gjv 
merge country_gjv using temp
tab _m
drop if _m == 2
drop _m
keep if year >= 1990 & year <= 2020
count
* 156
collapse (mean) year [w=pop], by(country_gjv pop)
sum year [w=pop], d
* mean 1999 median 2001, so "ca 2000"

***** SECTORAL DATA FROM IPUMS *****

* Created in the folder "IPUMS and other files"
use "dataset_industry_ipums_all(09.16.2021).dta", clear
count
* 250
sort country year
keep if type_share == "" | type_share == "bet_urbsh"
gen ipu_indu_urb_imputed = (type_share == "bet_urbsh")
drop type_share year_share
bysort country year: keep if _n == 1
ren country country_gjv
replace country_gjv = "Bolivia (Plurinational State of)" if country_gjv == "Bolivia"
replace country_gjv = "Egypt" if country_gjv == "Egypt, Arab Rep."
replace country_gjv = "Iran (Islamic Republic of)" if country_gjv == "Iran, Islamic Rep."
replace country_gjv = "Lao People's Democratic Republic" if country_gjv == "Lao PDR"
replace country_gjv = "United Republic of Tanzania" if country_gjv == "Tanzania"
replace country_gjv = "Venezuela (Bolivarian Republic of)" if country_gjv == "Venezuela, RB"
replace country_gjv = "Viet Nam" if country_gjv == "Vietnam"
egen test = rsum(agri-unknowns)
sum test, d
drop test
sum supressed unknowns
* There are consistency issues for some country-years
drop supressed unknowns
egen test = rsum(agri-other_industry)
sum test, d
foreach X of varlist agri-other_industry {
replace `X' = `X'/test*100
}
drop test
egen test = rsum(agri-other_industry)
sum test, d
drop test
* We check country by country
order country year agri-other_industry
* Except for mining where the 0s must be close to 0, the other ones are not 0s. 
foreach X of varlist agri-other_industry {
replace `X' = . if `X' == 0
}
sort country_gjv
save dataset_industry_ipums_t, replace
tab year

** We create the mean 1990-2020 vs. select the closest country-year to 2000 **
foreach G in t {
use dataset_industry_ipums_`G', clear
destring year, replace 
keep if year >= 1990 & year <= 2020
gen dist2000 = abs(year-2000)
gsort +country +dist2000 -year
order country dist2000 year 
bysort country: keep if _n == 1
tab year
ren year mean_yr_indu
foreach X of varlist agri-other_industry {
ren `X' ipuI_`X'_`G'
}
save industry_ipums_19902020_`G', replace
}

** We add 1990-2020 to the main data set **
use LACdata, clear
foreach X in 19902020 {
foreach G in t {
sort country_gjv 
merge country_gjv using industry_ipums_`X'_`G'
tab _m
tab country_gjv if _m == 2
drop _m
}
}
drop dist2000 mean_yr_indu ipu_indu_urb_imputed
save LACdata, replace

*********************************
*** IPUMS INFORMAL EMPLOYMENT ***
*********************************

** Created from the IPUMS data 
* See the "IPUMS and other files" folder 
clear
use "dataset_ipums_informality_all(09.22.2021).dta", clear
count
* 167
keep country year s_* 
drop s_selfemployed_cooperative s_selfemployed_sharecropper 
order country year
foreach X of varlist s_* {
replace `X' = `X'*100
}
egen test = rsum(s_wage_worker-s_other)
sum test, d
drop test
ren s_wage_worker wagewker
ren s_selfemployed selfwker
ren s_unpaidworker unpaidwker
ren s_other otherwker 
egen test = rsum(wagewker selfwker unpaidwker otherwker)
sum test, d
ren s_selfemployed_employer selfwkerboss
ren s_selfemployed_ownaccount selfwkerown
foreach X of varlist wagewker selfwker unpaidwker otherwker selfwkerboss selfwkerown {
replace `X' = `X'/test*100
}
drop test 
egen test = rsum(wagewker selfwker unpaidwker otherwker)
sum test, d
drop test
drop s_unknownmissin
foreach X of varlist *wker {
replace `X' = . if `X' == 0
}
egen unpothwker = rsum(unpaidwker otherwker)
drop otherwker
order wagewker selfwker unpaidwker unpothwker selfwkerboss selfwkerown   
sort country year
save dataset_informal_ipums_t, replace 

** Mean 1990-2020 **
foreach G in t {
use dataset_informal_ipums_`G', clear
destring year, replace 
keep if year >= 1990 & year <= 2020
keep *wker* year country
gen dist2000 = abs(year-2000)
gsort +country +dist2000 -year
order country dist2000 year 
bysort country: keep if _n == 1
ren year mean_yr_info
foreach X of varlist *wker* {
ren `X' ipuF_`X'_`G'
}
ren country country_gjv
replace country_gjv = "Bolivia (Plurinational State of)" if country_gjv == "Bolivia"
replace country_gjv = "Egypt" if country_gjv == "Egypt, Arab Rep."
replace country_gjv = "Iran (Islamic Republic of)" if country_gjv == "Iran, Islamic Rep."
replace country_gjv = "Lao People's Democratic Republic" if country_gjv == "Lao PDR"
replace country_gjv = "United Republic of Tanzania" if country_gjv == "Tanzania"
replace country_gjv = "Venezuela (Bolivarian Republic of)" if country_gjv == "Venezuela, RB"
replace country_gjv = "Viet Nam" if country_gjv == "Vietnam"
sort country_gjv
save informal_ipums_19902020_`G', replace
}
* We merge with the main data set
use LACdata, clear
foreach X in 19902020 {
foreach G in t {
sort country_gjv 
merge country_gjv using informal_ipums_`X'_`G', update
tab _m
tab country_gjv if _m == 2
drop _m
}
}
drop dist2000 mean_yr_info 
label var ipuF_unpothwker_t "Share Unpaid/Other worker"
save LACdata, replace

*************************************************
*** IPUMS INFORMAL EMPLOYMENT FOR EACH SECTOR ***
*************************************************

** Created from the IPUMS data 
* See the "Ipums and other files" folder 
clear
use "dataset_industry_ipums_informality(09.22.2021).dta", clear
count
keep country industry year s_* 
order country industry s_*
tab industry
drop if industry == "other_industry"
drop if industry == "other_services"
drop if industry == "supressed"
drop if industry == "unknowns"
drop if industry == "unspec_service"
tab industry
drop s_selfemployed_cooperative s_selfemployed_sharecropper 
order country year
foreach X of varlist s_* {
replace `X' = `X'*100
} 
egen test = rsum(s_selfemployed-s_unknown)
sum test, d
drop test
sum s_unknown, d
drop s_unknown
ren s_wage_worker wagewker
ren s_selfemployed selfwker
ren s_unpaidworker unpaidwker
ren s_other otherwker
egen test = rsum(wagewker selfwker unpaidwker otherwker)
sum test, d
ren s_selfemployed_employer selfwkerboss
ren s_selfemployed_ownaccount selfwkerown
foreach X of varlist wagewker selfwker unpaidwker otherwker selfwkerboss selfwkerown {
replace `X' = `X'/test*100
}
drop test 
egen test = rsum(wagewker selfwker unpaidwker otherwker)
sum test, d
drop test
foreach X of varlist *wker {
replace `X' = . if `X' == 0
}
egen unpothwker = rsum(unpaidwker otherwker)
drop otherwker
order wagewker selfwker unpaidwker unpothwker selfwkerboss selfwkerown
tab industry
gen indu = "_mfg" if industry == "mfg"
replace indu = "_tra" if industry == "trade"
replace indu = "_gvt" if industry == "govmt"
replace indu = "_fi" if industry == "finance"
replace indu = "_re" if industry == "buss_service"
replace indu = "_hh" if industry == "household_serv"
keep if indu != ""
drop industry
ren s_selfemployed_unspecified selfunspec
sum selfunspec, d
reshape wide wagewker selfwker unpaidwker unpothwker selfwkerboss selfwkerown selfunspec, i(country year) j(indu) string
sort country year
save dataset_informal_industry_ipums_t, replace 

** Mean 1990-2020 **
foreach G in t {
use dataset_informal_industry_ipums_`G', clear
destring year, replace 
keep if year >= 1990 & year <= 2020
keep *wker* year country
gen dist2000 = abs(year-2000)
gsort +country +dist2000 -year
order country dist2000 year 
bysort country: keep if _n == 1
ren year mean_yr_info
foreach X of varlist *wker* {
ren `X' ipuFI_`X'_`G'
}
ren country country_gjv
replace country_gjv = "Bolivia (Plurinational State of)" if country_gjv == "Bolivia"
replace country_gjv = "Egypt" if country_gjv == "Egypt, Arab Rep."
replace country_gjv = "Iran (Islamic Republic of)" if country_gjv == "Iran, Islamic Rep."
replace country_gjv = "Lao People's Democratic Republic" if country_gjv == "Lao PDR"
replace country_gjv = "United Republic of Tanzania" if country_gjv == "Tanzania"
replace country_gjv = "Venezuela (Bolivarian Republic of)" if country_gjv == "Venezuela, RB"
replace country_gjv = "Viet Nam" if country_gjv == "Vietnam"
sort country_gjv
save informal_industry_ipums_19902020_`G', replace
}
* We merge with the main data set
use LACdata, clear
foreach X in 19902020 {
foreach G in t {
sort country_gjv 
merge country_gjv using informal_industry_ipums_`X'_`G', update
tab _m
tab country_gjv if _m == 2
drop _m
}
}
drop mean_yr_info dist2000
save LACdata, replace

*************************
*** IPUMS URBAN SHARE ***
*************************

* Implied urban share in the IPUMS country-years data sets 
* Created in the folder "IPUMS and other files"
use allregions_urbsh, clear
keep country year urbsh
ren urbsh urbsh_ipums
gen country_gjv = country
replace country_gjv = "Bolivia (Plurinational State of)" if country_gjv == "Bolivia"
replace country_gjv = "Egypt" if country_gjv == "Egypt, Arab Rep."
replace country_gjv = "Iran (Islamic Republic of)" if country_gjv == "Iran, Islamic Rep."
replace country_gjv = "Lao People's Democratic Republic" if country_gjv == "Lao PDR"
replace country_gjv = "United Republic of Tanzania" if country_gjv == "Tanzania"
replace country_gjv = "Venezuela (Bolivarian Republic of)" if country_gjv == "Venezuela, RB"
replace country_gjv = "Viet Nam" if country_gjv == "Vietnam"
label var urbsh_ipums "Implied urban share in the IPUMS country-years data sets"
sort country_gjv year 
save urbsh_ipums, replace

****************************
*** SUPER IPUMS DATA SET ***
****************************

* We create a "super" IPUMS data set 
* Industry
foreach G in t {
use dataset_industry_ipums_`G', clear
foreach X of varlist agri-other_industry {
ren `X' `X'_`G' 
}
gen indu_data_`G'_yn = 1
sort country_gjv year
save dataset_industry_ipums_`G'_2, replace
}
* Informal
foreach G in t {
use dataset_informal_ipums_`G', clear
foreach X of varlist wagewker-selfwkerown {
ren `X' `X'_`G' 
}
replace country = "Laos" if country == "Lao PDR"
replace country = "Laos" if country == "Laos "
gen informal_data_`G'_yn = 1
sort country year
save dataset_informal_ipums_`G'_2, replace
}
* Urban share
use allregions_urbsh, clear
sort country year
keep country year urbsh
ren urbsh urbsh_ipums
replace country = "Laos" if country == "Lao PDR"
destring year, replace
* Adding informal
foreach G in t {
sort country year 
merge country year using dataset_informal_ipums_`G'_2
tab _m 
tab country year if _m == 2 
drop _m
}
gen country_gjv = country
replace country_gjv = "Bolivia (Plurinational State of)" if country_gjv == "Bolivia"
replace country_gjv = "Egypt" if country_gjv == "Egypt, Arab Rep."
replace country_gjv = "Iran (Islamic Republic of)" if country_gjv == "Iran, Islamic Rep."
replace country_gjv = "Lao People's Democratic Republic" if country_gjv == "Lao PDR"
replace country_gjv = "Lao People's Democratic Republic" if country_gjv == "Laos"
replace country_gjv = "United Republic of Tanzania" if country_gjv == "Tanzania"
replace country_gjv = "Venezuela (Bolivarian Republic of)" if country_gjv == "Venezuela, RB"
replace country_gjv = "Viet Nam" if country_gjv == "Vietnam"
count
* 214
codebook country_gjv 
* Adding industry
foreach G in t {
sort country_gjv year 
merge country_gjv year using dataset_industry_ipums_`G'_2
tab _m if ipu_indu_urb_imputed == 0
tab country_gjv year if ipu_indu_urb_imputed == 0 & _m == 2
drop _m
}
* keep if samples with IPUMS data * 
keep if indu_data_t_yn == 1 | informal_data_t_yn == 1
count
* 203
codebook country_gjv wagewker_* selfwker_*
* 63
order country_gjv year 
sort country_gjv year
save superipums, replace

*******************************
***** TALL BUILDINGS DATA *****
*******************************

* Data set obtained from the replication files of:
* Jedwab, Remi & Loungani, Prakash & Yezer, Anthony, 2021. "Comparing cities in developed and developing countries: Population, land area, building height and crowding," Regional Science and Urban Economics, Elsevier, vol. 86(C).
* The data set only includes buildings above 80 meters. 
use sky_final_add2, clear
count
* 25,214
* We keep some types of tall buildings. 
keep if type == "Building" | type == "Tower-Building"
drop if status == "Demolished" | status == "Proposed" | status == "On Hold"
count
* 20,911
** WE CLEAN THE YEAR VARIABLE **
replace year = 1972 if year == 1972.5
replace year = 2002 if year == 2002.5
replace year = 2010 if year == 2010.5
replace year = 2011 if year == 2011.5
replace year = 2012 if year == 2012.5
replace year = 2013 if year == 2013.5
replace year = 2016 if year == 2016.5
replace year = 2017 if year == 2017.5
replace year = 2018 if year == 2018.5
replace year = 2019 if year == 2019.5
replace year = 2021 if year == 2021.5
* We keep the years 2021 2022 2024 because their construction started before so their construction depends on the 2010s 
replace year = 2020 if year >= 2020 & year <= 2024
count
* 20,911
** WE CORRECT SOME SMALL MISTAKES **
replace country_sky = "United Kingdom" if id == 12613 & name == "King's Reach Tower"
replace countrylabel = "United Kingdom" if id == 12613 & name == "King's Reach Tower"
replace ccode = "GBR" if id == 12613 & name == "King's Reach Tower"
replace city_sky = "London" if id == 12613 & name == "King's Reach Tower"
replace citylabel = "London" if id == 12613 & name == "King's Reach Tower"
** WE ADD SOME YEARS FOR SOME BUILDINGS **
* RTNC Congo Building = 1975
* source: http://www.wikinshasa.org/index.php/La_Voix_du_Peuple
replace year = 1975 if name == "RTNC Congo Building"
* Damascus Tower
* 26 years ago, so 2019 - 26 = 1993
* source: https://pixzing.com/ig/mojeur
replace year = 1993 if name == "Damascus Tower"
* Tunis - Palais de Congress - 1969
* source: https://www.emporis.fr/city/100158/tunis-tunisia
replace year = 1969 if name == "Palais de Congress"
* Mali - BCEA - 1994
* source: https://fr.wikipedia.org/wiki/Tour_de_la_BCEAO_(Bamako)
replace year = 1994 if name == "BCEAO Tower"
* Rwanda 2011
* source: http://www.skyscrapercenter.com/building/kigali-city-tower/22763
replace height2 = 90 if name == "Kigali City Tower"
replace hgt_arch_m = 90 if name == "Kigali City Tower"
* Puerto Rico => USA
replace country_wb = "United States" if citylabel == "San Juan (Puerto Rico)"
replace ccode = "USA" if country_wb == "United States"
* We keep if above 80 m or among the top 10 buildings today
drop if height2 == . | height2 == 0
gsort country_wb -height2
bysort country_wb: gen rank = _n
order country_wb height2
keep if height2 >= 80 | rank <= 10
count
* 16,444
sum height2, d
save sky_new_v1, replace

use sky_new_v1, clear
gen minht80 = (height2 != . & height2 >= 80 & year != .)
bysort country_wb: egen maxminht80 = max(minht80)
tab country_wb
gsort country_wb -height2
order country_wb height2 year name
* We find more information online and also correct some mistakes in the data set
* Wyndham Aruba Beach Resort 1977
* Source: http://bb.visitaruba.com/f2/older-hotel-questions-8707/
replace year = 1977 if name == "Wyndham Aruba Beach Resort" 
* BCEAO Building 1990
* Source: https://www.bceao.int/sites/default/files/inline-files/chronologie_des_evenements_marquants_de_l_histoire_de_la_bceao_et_de_l_umoa.pdf
replace year = 1977 if citylabel == "Cotonou" & name == "BCEAO Building" 
* Source: BCEAO Building 1990
* https://www.bceao.int/sites/default/files/inline-files/chronologie_des_evenements_marquants_de_l_histoire_de_la_bceao_et_de_l_umoa.pdf
replace year = 1977 if citylabel == "Ouagadougou" & name == "BCEAO Building" 
* SSNIT Tower Block 1997
* Source: https://skyscraperpage.com/cities/?buildingID=32396
replace year = 1997 if citylabel == "Accra" & name == "SSNIT Tower Block" 
* Asanbay 2A
* Source: http://www.ees.energy.lth.se/fileadmin/ees/Publikationer/Ex5297-Bergstrom_Johannessen.pdf
replace year = 1965 if name == "Asanbay 2A"
replace year = 1965 if name == "Asanbay 3V"
* Liechtensteinische Post 
* Source: https://en.wikipedia.org/wiki/Liechtensteinische_Post
replace year = 2000 if name == "Liechtensteinische Post"
* Al Khaima City Center Appart-hotel 2006
* Source: https://slidex.tips/download/building-success-name-mce-sa-head-office-7th-floor-al-khaima-city-center-10-rue
replace year = 2006 if name == "Al Khaima City Center Appart-hotel"
* Grand Hotel
* Source: https://www.hotels.com/ho548173/kathmandu-grand-hotel-kathmandu-nepal/
replace year = 2006 if name == "Grand Hotel" & citylabel == "Kathmandu"
* BCEA Niamey
* Source: https://www.bceao.int/sites/default/files/inline-files/chronologie_des_evenements_marquants_de_l_histoire_de_la_bceao_et_de_l_umoa.pdf
replace year = 1990 if citylabel == "Niamey" & name == "BCEAO Building" 
* Qubaty Tower
* Was built in the 2000s
* We use the mid-year "2005"
replace year = 2005 if citylabel == "Sana'a" & name == "Qubaty Tower" 
* Ulice SLobode
* Source: http://skyscraperpage.com/cities/?cityID=2379
replace year = 1971 if name == "Ulice Slobode 10" 
replace year = 1971 if name == "Ulice Slobode 12"
* There is a clear mistake with this one, we thus correct it. 
replace height2 = 197.8152 if name == "Residences by Armani Casa"
replace height = 197.8152 if name == "Residences by Armani Casa"
replace hgt_tip_m = 197.8152 if name == "Residences by Armani Casa"
replace hgt_arch_m = 197.8152 if name == "Residences by Armani Casa"
save sky_new_v1, replace
count
* 16,444

* Distribution of heights
use sky_new_v1, clear
sum height2 if height2 < 80, d
sum height2 if height2 >= 80, d
* 25th = 100
* median = 125
* mean = 140
* 75th = 165
* 90th = 210
* 95th = 240
codebook height2

* We add 3 buildings that we found online * 
import excel "new_build_08222019.xlsx", sheet("Sheet1") firstrow clear
drop if country_wb == ""
save extrabuild, replace
count
* We add to the main data set
use sky_new_v1, clear
append using extrabuild
save sky_new_v1, replace
count
* 16447
* We now compute the main building variables 

***** USING ALL BUILDINGS *****

* Height for each building type 
foreach Y of numlist 1960(5)2020 {
use sky_new_v1, clear
count
keep if year <= `Y'
gen height2All = height2
gen nores = (offic == 1 | hotel == 1 | retai == 1)
* For each type: 
gen all = 1
gen priv = (gover == 0)
foreach Z in all resid nores offic hotel retai gover priv { 
gen num`Z' = 1 if `Z' == 1
gen height`Z' = height2All if `Z' == 1
}
drop height2All height2
* We collapse at the country level. 
collapse (sum) height* num*, by(countrylabel ccode)
foreach X in all resid nores offic hotel retai gover priv { 
ren height`X' sumht_`X'
ren num`X' numb_`X' 
replace sumht_`X' = . if sumht_`X' == 0
replace numb_`X' = . if numb_`X' == 0
label var sumht_`X' "Sum of heights (km) for `X' buildings"
label var numb_`X' "Number of buildings for `X' buildings"
}
gen year = `Y'
sort ccode year
save stock_yr`Y', replace
}
* We combine these annual stocks to create a data set at the country-year level.
use stock_yr1960, clear
foreach Y of numlist 1965(5)2020 {
append using stock_yr`Y'
}
sort ccode year
save stock, replace
use stock, clear
codebook sumht_all
order ccode year sumht_all
sort ccode year
* We add to the main data set 
use LACdata, clear
sort ccode year
merge ccode year using stock
tab _m
drop if _m == 2
drop _m
codebook sumht_all
* We do not need the "number" variables
drop num* height 
* We focus on the sum of heights instead.
save LACdata, replace

***** USING BUILDINGS ABOVE CERTAIN HEIGHTS *****

foreach Y of numlist 1960(5)2020 {
use sky_new_v1, clear
count
keep if year <= `Y'
gen height2All = height2
gen nores = (offic == 1 | hotel == 1 | retai == 1)
* For each type: 
gen all = 1
gen priv = (gover == 0)
foreach N in 100 125 130 140 150 160 165 170 200 210 240 {
foreach Z in all resid nores offic hotel retai gover priv {
gen num`N'`Z' = 1 if `Z' == 1 & height2All >= `N' & height2All != .
gen height`N'`Z' = height2All if `Z' == 1 & height2All >= `N' & height2All != .
}
}
drop height2All height2
* We collapse at the country level. 
collapse (sum) height* num*, by(countrylabel ccode)
foreach N in 100 125 130 140 150 160 165 170 200 210 240 {
foreach X in all resid nores offic hotel retai gover priv {
ren height`N'`X' sumht`N'_`X'
ren num`N'`X' numb`N'_`X' 
replace sumht`N'_`X' = . if sumht`N'_`X' == 0
replace numb`N'_`X' = . if numb`N'_`X' == 0
label var sumht`N'_`X' "Sum of heights (km) for `X' buildings"
label var numb`N'_`X' "Number of buildings for `X' buildings"
}
}
gen year = `Y'
sort ccode year
save stock_yr`Y', replace
}
* We combine these annual stocks to create a data set at the country-year level.
use stock_yr1960, clear
foreach Y of numlist 1965(5)2020 {
append using stock_yr`Y'
}
sort ccode year
save stock, replace
* We add to the main data set 
use LACdata, clear
*keep if year >= 1960 & year <= 2020
sort ccode year
merge ccode year using stock
tab _m
*drop if _m == 3
drop if _m == 2
drop _m
drop num_* numb*
save LACdata, replace

***** USING VANITY MEASURE *****

use sky_new_v1, clear
* We employ measures of "vanity height," defined as the distance between the highest occupiable floor and the architectural top of the structure
* We try out different specifications, including ln(vanity meters+1), number of vanity meters, and vanity meters/total building height, both for the entire building-mass of skyscrapers and only for "signal buildings"
* Our main measure of height is based on "height to the tip". 
corr hgt_tip_m hgt_arch_m
count if hgt_tip_m != . | hgt_arch_m != .
** Predicting tip **
reg hgt_tip_m hgt_arch_m, robust
replace hgt_tip_m = .3462614 + 1.002307*hgt_arch_m if hgt_tip_m == .
count if hgt_tip_m != .
** Predicting arch **
reg hgt_arch_m hgt_tip_m, robust
replace hgt_arch_m = .5406564 + .991524*hgt_tip_m if hgt_arch_m == .
count if hgt_arch_m != .
save sky_new_v1, replace
codebook hgt_tip_m hgt_arch_m hgt_occp_m
* Share stock not missing 
foreach Y of numlist 1960(5)2020 {
use sky_new_v1, clear
keep if year <= `Y'
collapse (sum) height2 hgt_tip_m hgt_arch_m hgt_occp_m, by(countrylabel ccode)
gen year = `Y'
gen shtip = hgt_occp_m/hgt_tip_m*100 if hgt_occp_m != . & hgt_tip_m != .
gen sharch = hgt_occp_m/hgt_arch_m*100 if hgt_occp_m != . & hgt_arch_m != .
keep ccode year shtip sharch
sort ccode year
save shtiparch`Y', replace
}
* We combine these annual stocks to create a data set at the country-year level.
use shtiparch1960, clear
foreach Y of numlist 1965(5)2020 {
append using shtiparch`Y'
}
sort ccode year
save shtiparch, replace
foreach Y of numlist 1960(5)2020 {
use sky_new_v1, clear
count
keep if year <= `Y'
gen ht_tipocp = hgt_tip_m - hgt_occp_m
gen ht_archocp = hgt_arch_m - hgt_occp_m
gen nores = (offic == 1 | hotel == 1 | retai == 1)
* For each type: 
gen all = 1
gen priv = (gover == 0)
foreach Z in all resid nores offic hotel retai gover priv {
gen height_tipocp`Z' = ht_tipocp if `Z' == 1
gen height_archocp`Z' = ht_archocp if `Z' == 1
}
drop height2
* We collapse at the country level. 
collapse (sum) height_*, by(countrylabel ccode)
foreach N in tipocp archocp {
foreach X in all resid nores offic hotel retai gover priv {
ren height_`N'`X' sumht`N'_`X'
replace sumht`N'_`X' = . if sumht`N'_`X' == 0
label var sumht`N'_`X' "Sum of heights (km) for `X' buildings"
}
}
gen year = `Y'
sort ccode year
save stock_yr`Y', replace
}
* We combine these annual stocks to create a data set at the country-year level.
use stock_yr1960, clear
foreach Y of numlist 1965(5)2020 {
append using stock_yr`Y'
}
sort ccode year
save stock, replace
* We add to the main data set 
use LACdata, clear
sort ccode year
merge ccode year using stock
tab _m
drop if _m == 2
drop _m
save LACdata, replace

*** SUM HEIGHT DIFFERENT MATERIALS ***

* We re-add the original material variable. 
* It is available in this file "material"
* Composite is combination of steel/concrete in structural material rather than only steel or only concrete as structure material
use sky_new_v1, clear
sort id
merge id using material
tab _m
drop if _m == 2
* These 3 were added manually
replace Material = material if _m == 1
drop _m
tab material Material, m
tab Material, m
keep id Material
drop if Material == "x" | Material == ""
replace Material = "concrete" if Material == "precast" | Material == "Concrete"
replace Material = "other" if Material == "wood" | Material == "masonry"
replace Material = "composite" if Material == "concrete/steel" | Material == "steel/concrete"
replace Material = "composite" if Material != "concrete" & Material != "steel" & Material != "other"
tab Material, m
sort id
save temp, replace
* Share stock not missing 
foreach Y of numlist 1960(5)2020 {
use sky_new_v1, clear
keep if year <= `Y'
drop if material == "x" | material == ""
sort id
merge id using temp
tab _m 
drop material
ren Material material
tab material, m
gen all = 1
gen nores = (offic == 1 | hotel == 1 | retai == 1)
gen priv = (gover == 0)
foreach X in all resid nores offic hotel retai gover priv {
gen height2_`X' = height2 if `X' == 1
}
drop height2 
replace material = "stel" if material == "steel"
replace material = "comp" if material == "composite"
replace material = "conc" if material == "concrete"
replace material = "othr" if material == "other"
collapse (sum) height2*, by(countrylabel ccode material)
gen year = `Y'
drop countrylabel
reshape wide height2_all height2_resid height2_nores height2_offic height2_hotel height2_retai height2_priv height2_gover, i(ccode) j(material) string
foreach N in conc stel othr comp {
foreach X in all resid nores gover priv {
ren height2_`X'`N' sumht`N'_`X'
replace sumht`N'_`X' = . if sumht`N'_`X' == 0
label var sumht`N'_`X' "Sum of heights (km) for `X' buildings"
}
}
sort ccode year
save stock_yr`Y', replace
}
* We combine these annual stocks to create a data set at the country-year level.
use stock_yr1960, clear
foreach Y of numlist 1965(5)2020 {
append using stock_yr`Y'
}
sort ccode year
save stock, replace
* We add to the main data set 
use LACdata, clear
sort ccode year
merge ccode year using stock
tab _m
drop if _m == 2
drop _m
drop height2_*
save LACdata, replace

*** STOCK WITHOUT CAPITAL & LARGEST CITY ***

* We create the existing stocks of buildings and heights for each year:
foreach Y of numlist 1960(5)2020 {
use sky_new_v1, clear
count
codebook countrylabel
sort countrylabel citylabel
merge countrylabel citylabel using list_capitals
tab _m
drop if _m == 2
drop _m
drop if capital == 1 | largest == 1
keep if year <= `Y'
gen all = 1
gen nores = (offic == 1 | hotel == 1 | retai == 1)
gen priv = (gover == 0)
foreach X in all resid nores offic hotel retai gover priv {
gen height2_`X' = height2 if `X' == 1
}
drop height2 
collapse (sum) height2*, by(countrylabel ccode)
foreach N in nolc {
foreach X in all resid nores offic hotel retai gover priv {
ren height2_`X' sumht`N'_`X'
replace sumht`N'_`X' = . if sumht`N'_`X' == 0
label var sumht`N'_`X' "Sum of heights (km) for `X' buildings"
}
}
gen year = `Y'
sort ccode year
save stock_yr`Y'_nocaplar, replace
}
* We combine these annual stocks to create a data set at the country-year level.
use stock_yr1960_nocaplar, clear
foreach Y of numlist 1965(5)2020 {
append using stock_yr`Y'_nocaplar
}
sort ccode year
save stock_nocaplar, replace
* We add to the main data set 
use LACdata, clear
sort ccode year
merge ccode year using stock_nocaplar
tab _m
tab country_wb if _m == 1 & year >= 1960
tab year if country_wb == "Japan" & _m == 1
drop if _m == 2
drop _m
save LACdata, replace

*** STOCK ONLY CAPITAL & LARGEST CITY ***

* We create the existing stocks of buildings and heights for each year:
foreach Y of numlist 1960(5)2020 {
use sky_new_v1, clear
count
codebook countrylabel
sort countrylabel citylabel
merge countrylabel citylabel using list_capitals
tab _m
drop if _m == 2
drop _m
keep if capital == 1 | largest == 1
keep if year <= `Y'
gen all = 1
gen priv = (gover == 0)
gen nores = (offic == 1 | hotel == 1 | retai == 1)
foreach X in all resid nores offic hotel retai gover priv {
gen height2_`X' = height2 if `X' == 1
}
drop height2 
collapse (sum) height2*, by(countrylabel ccode)
foreach N in lc {
foreach X in all resid nores offic hotel retai gover priv {
ren height2_`X' sumht`N'_`X'
replace sumht`N'_`X' = . if sumht`N'_`X' == 0
label var sumht`N'_`X' "Sum of heights (km) for `X' buildings"
}
}
gen year = `Y'
sort ccode year
save stock_yr`Y'_caplar, replace
}
* We combine these annual stocks to create a data set at the country-year level.
use stock_yr1960_caplar, clear
foreach Y of numlist 1965(5)2020 {
append using stock_yr`Y'_caplar
}
sort ccode year
save stock_caplar, replace
* We add to the main data set 
use LACdata, clear
sort ccode year
merge ccode year using stock_caplar
tab _m
tab country_wb if _m == 1 & year >= 1960
tab year if country_wb == "Japan" & _m == 1
drop if _m == 2
drop _m
save LACdata, replace

*** STOCK LARGEST CITY ONLY  ***

* We create the existing stocks of buildings and heights for each year:
foreach Y of numlist 1960(5)2020 {
use sky_new_v1, clear
count
codebook countrylabel
sort countrylabel citylabel
merge countrylabel citylabel using list_capitals
tab _m
drop if _m == 2
drop _m
keep if largest == 1
keep if year <= `Y'
gen all = 1
gen nores = (offic == 1 | hotel == 1 | retai == 1)
gen priv = (gover == 0)
foreach X in all resid nores offic hotel retai gover priv {
gen height2_`X' = height2 if `X' == 1
}
drop height2 
collapse (sum) height2*, by(countrylabel ccode)
foreach N in l {
foreach X in all resid nores offic hotel retai gover priv {
ren height2_`X' sumht`N'_`X'
replace sumht`N'_`X' = . if sumht`N'_`X' == 0
label var sumht`N'_`X' "Sum of heights (km) for `X' buildings"
}
}
gen year = `Y'
sort ccode year
save stock_yr`Y'_lar, replace
}
* We combine these annual stocks to create a data set at the country-year level.
use stock_yr1960_lar, clear
foreach Y of numlist 1965(5)2020 {
append using stock_yr`Y'_lar
}
sort ccode year
save stock_lar, replace
* We add to the main data set 
use LACdata, clear
sort ccode year
merge ccode year using stock_lar
tab _m
tab country_wb if _m == 1 & year >= 1960
tab year if country_wb == "Japan" & _m == 1
drop if _m == 2
drop _m
save LACdata, replace

*** STOCK WITHOUT THE LARGEST CITY ***

* We create the existing stocks of buildings and heights for each year:
foreach Y of numlist 1960(5)2020 {
use sky_new_v1, clear
count
codebook countrylabel
sort countrylabel citylabel
merge countrylabel citylabel using list_capitals
tab _m
drop if _m == 2
drop _m
drop if largest == 1
keep if year <= `Y'
gen all = 1
gen nores = (offic == 1 | hotel == 1 | retai == 1)
gen priv = (gover == 0)
foreach X in all resid nores offic hotel retai gover priv {
gen height2_`X' = height2 if `X' == 1
}
drop height2 
collapse (sum) height2*, by(countrylabel ccode)
foreach N in nol {
foreach X in all resid nores offic hotel retai gover priv {
ren height2_`X' sumht`N'_`X'
replace sumht`N'_`X' = . if sumht`N'_`X' == 0
label var sumht`N'_`X' "Sum of heights (km) for `X' buildings"
}
}
gen year = `Y'
sort ccode year
save stock_yr`Y'_nolar, replace
}
* We combine these annual stocks to create a data set at the country-year level.
use stock_yr1960_nolar, clear
foreach Y of numlist 1965(5)2020 {
append using stock_yr`Y'_nolar
}
sort ccode year
save stock_nolar, replace
* We add to the main data set 
use LACdata, clear
sort ccode year
merge ccode year using stock_nolar
tab _m
tab country_wb if _m == 1 & year >= 1960
tab year if country_wb == "Japan" & _m == 1
drop if _m == 2
drop _m
save LACdata, replace

*** STOCK CAPITAL CITY ONLY ***

* We create the existing stocks of buildings and heights for each year:
foreach Y of numlist 1960(5)2020 {
use sky_new_v1, clear
count
codebook countrylabel
sort countrylabel citylabel
merge countrylabel citylabel using list_capitals
tab _m
drop if _m == 2
drop _m
keep if capital == 1
keep if year <= `Y'
gen all = 1
gen nores = (offic == 1 | hotel == 1 | retai == 1)
gen priv = (gover == 0)
foreach X in all resid nores offic hotel retai gover priv {
gen height2_`X' = height2 if `X' == 1
}
drop height2 
collapse (sum) height2*, by(countrylabel ccode)
foreach N in c {
foreach X in all resid nores offic hotel retai gover priv {
ren height2_`X' sumht`N'_`X'
replace sumht`N'_`X' = . if sumht`N'_`X' == 0
label var sumht`N'_`X' "Sum of heights (km) for `X' buildings"
}
}
gen year = `Y'
sort ccode year
save stock_yr`Y'_cap, replace
}
* We combine these annual stocks to create a data set at the country-year level.
use stock_yr1960_cap, clear
foreach Y of numlist 1965(5)2020 {
append using stock_yr`Y'_cap
}
sort ccode year
save stock_cap, replace
* We add to the main data set 
use LACdata, clear
sort ccode year
merge ccode year using stock_cap
tab _m
tab country_wb if _m == 1 & year >= 1960
tab year if country_wb == "Japan" & _m == 1
drop if _m == 2
drop _m
save LACdata, replace

*** STOCK WITHOUT CAPITAL CITY ***

* We create the existing stocks of buildings and heights for each year:
foreach Y of numlist 1960(5)2020 {
use sky_new_v1, clear
count
codebook countrylabel
sort countrylabel citylabel
merge countrylabel citylabel using list_capitals
tab _m
drop if _m == 2
drop _m
drop if capital == 1
keep if year <= `Y'
gen all = 1
gen nores = (offic == 1 | hotel == 1 | retai == 1)
gen priv = (gover == 0)
foreach X in all resid nores offic hotel retai gover priv {
gen height2_`X' = height2 if `X' == 1
}
drop height2 
collapse (sum) height2*, by(countrylabel ccode)
foreach N in noc {
foreach X in all resid nores offic hotel retai gover priv {
ren height2_`X' sumht`N'_`X'
replace sumht`N'_`X' = . if sumht`N'_`X' == 0
label var sumht`N'_`X' "Sum of heights (km) for `X' buildings"
}
}
gen year = `Y'
sort ccode year
save stock_yr`Y'_nocap, replace
}
* We combine these annual stocks to create a data set at the country-year level.
use stock_yr1960_nocap, clear
foreach Y of numlist 1965(5)2020 {
append using stock_yr`Y'_nocap
}
sort ccode year
save stock_nocap, replace
* We add to the main data set 
use LACdata, clear
sort ccode year
merge ccode year using stock_nocap
tab _m
tab country_wb if _m == 1 & year >= 1960
tab year if country_wb == "Japan" & _m == 1
drop if _m == 2
drop _m
save LACdata, replace

**************
*** CEMENT ***
**************

* Source: USGS  										
* HYDRAULIC CEMENT: WORLD PRODUCTION, BY COUNTRY OR LOCALITY																			
* (Thousand metric tons)	
clear
import excel "cement_all_yrs_modified.xls", sheet("Sheet1") firstrow clear
drop if drop == 1
drop y2017new
drop if country == ""
drop drop
reshape long y, i(country) j(year)
sort country year
replace y = . if y == 0
bysort country: ipolate y year, gen(yi)
replace yi = . if yi == y
sort country year
save cement_all_yrs_modified, replace

*** Cement for 5 year periods *** 
use cement_all_yrs_modified, clear
codebook country
* 166 countries
keep if year >= 1960
sort country year
ren y cement 
ren yi cementi
replace cement = cementi if cement == .
drop cementi
codebook cement
gen count = 1 if cement != .
bysort country: egen sumcount = sum(count)
tab sumcount
tab country if sumcount < 50
gen country_wb = country
drop country
replace country_wb = "Myanmar" if country_wb == "Burma"
replace country_wb = "Congo, Rep." if country_wb == "Congo (Brazzaville)"
replace country_wb = "Congo, Dem. Rep." if country_wb == "Congo (Kinshasa)"
replace country_wb = "Czech Republic" if country_wb == "Czechia"
replace country_wb = "Egypt, Arab Rep." if country_wb == "Egypt"
replace country_wb = "Hong Kong SAR, China" if country_wb == "Hong Kong"
replace country_wb = "Iran, Islamic Rep." if country_wb == "Iran"
replace country_wb = "Côte d'Ivoire" if country_wb == "Ivory Coast"
replace country_wb = "Korea, Dem. People's Rep." if country_wb == "Korea, North"
replace country_wb = "Korea, Rep." if country_wb == "Korea, Republic of"
replace country_wb = "Kyrgyz Republic" if country_wb == "Kyrgyzstan"
replace country_wb = "Lao PDR" if country_wb == "Laos"
replace country_wb = "Macao SAR, China" if country_wb == "Macau"
replace country_wb = "North Macedonia" if country_wb == "Macedonia"
replace country_wb = "Russian Federation" if country_wb == "Russia"
replace country_wb = "Slovak Republic" if country_wb == "Slovakia"
replace country_wb = "Syrian Arab Republic" if country_wb == "Syria"
replace country_wb = "Republic of China (Taiwan)" if country_wb == "Taiwan"
replace country_wb = "Venezuela, RB" if country_wb == "Venezuela"
replace country_wb = "Yemen, Rep." if country_wb == "Yemen"
replace country_wb = "Taiwan" if country_wb == "Republic of China (Taiwan)"
save cement_temp, replace
gen period = .
replace period = 1965 if year >= 1960 & year <= 1965
replace period = 1970 if year >= 1966 & year <= 1970
replace period = 1975 if year >= 1971 & year <= 1975
replace period = 1980 if year >= 1976 & year <= 1980
replace period = 1985 if year >= 1981 & year <= 1985
replace period = 1990 if year >= 1986 & year <= 1990
replace period = 1995 if year >= 1991 & year <= 1995
replace period = 2000 if year >= 1996 & year <= 2000
replace period = 2005 if year >= 2001 & year <= 2005
replace period = 2010 if year >= 2006 & year <= 2010
replace period = 2015 if year >= 2011 & year <= 2015
replace period = 2020 if year >= 2016 & year <= 2020
tab year period, m
collapse (mean) cement, by(country_wb period)
ren period year
* sum of cement production in 5-year period
replace cement = cement*5
label var cement "sum of cement production in last 5-year period"
codebook cement
sort country_wb year 
save cement, replace

*** Cement for 10 yr periods *** 
use cement_all_yrs_modified, clear
codebook country
* 166 countries
sort country year
ren y cement 
ren yi cementi
replace cement = cementi if cement == .
drop cementi
codebook cement
count if cement != . & year == 1970
count if cement != . & year == 1965
count if cement != . & year == 1960
count if cement != . & year == 1955
count if cement != . & year == 1950
gen country_wb = country
drop country
replace country_wb = "Myanmar" if country_wb == "Burma"
replace country_wb = "Congo, Rep." if country_wb == "Congo (Brazzaville)"
replace country_wb = "Congo, Dem. Rep." if country_wb == "Congo (Kinshasa)"
replace country_wb = "Czech Republic" if country_wb == "Czechia"
replace country_wb = "Egypt, Arab Rep." if country_wb == "Egypt"
replace country_wb = "Hong Kong SAR, China" if country_wb == "Hong Kong"
replace country_wb = "Iran, Islamic Rep." if country_wb == "Iran"
replace country_wb = "Côte d'Ivoire" if country_wb == "Ivory Coast"
replace country_wb = "Korea, Dem. People's Rep." if country_wb == "Korea, North"
replace country_wb = "Korea, Rep." if country_wb == "Korea, Republic of"
replace country_wb = "Kyrgyz Republic" if country_wb == "Kyrgyzstan"
replace country_wb = "Lao PDR" if country_wb == "Laos"
replace country_wb = "Macao SAR, China" if country_wb == "Macau"
replace country_wb = "North Macedonia" if country_wb == "Macedonia"
replace country_wb = "Russian Federation" if country_wb == "Russia"
replace country_wb = "Slovak Republic" if country_wb == "Slovakia"
replace country_wb = "Syrian Arab Republic" if country_wb == "Syria"
replace country_wb = "Republic of China (Taiwan)" if country_wb == "Taiwan"
replace country_wb = "Venezuela, RB" if country_wb == "Venezuela"
replace country_wb = "Yemen, Rep." if country_wb == "Yemen"
replace country_wb = "Taiwan" if country_wb == "Republic of China (Taiwan)"
gen period = .
replace period = 1960 if year >= 1950 & year <= 1960
replace period = 1970 if year >= 1961 & year <= 1970
replace period = 1980 if year >= 1971 & year <= 1980
replace period = 1990 if year >= 1981 & year <= 1990
replace period = 2000 if year >= 1991 & year <= 2000
replace period = 2010 if year >= 2001 & year <= 2010
replace period = 2020 if year >= 2011 & year <= 2020
tab year period, m
collapse (mean) cement, by(country_wb period)
ren period year
* sum of cement production in 10-year period
ren cement cement10
label var cement "mean of cement production in last 10 yrs"
codebook cement
sort country_wb year 
save cement10, replace

*** Cement for 20 year periods *** 
use cement_all_yrs_modified, clear
codebook country
* 166 countries
sort country year
ren y cement 
ren yi cementi
replace cement = cementi if cement == .
drop cementi
codebook cement
count if cement != . & year == 1970
count if cement != . & year == 1965
count if cement != . & year == 1960
count if cement != . & year == 1955
count if cement != . & year == 1950
gen country_wb = country
drop country
replace country_wb = "Myanmar" if country_wb == "Burma"
replace country_wb = "Congo, Rep." if country_wb == "Congo (Brazzaville)"
replace country_wb = "Congo, Dem. Rep." if country_wb == "Congo (Kinshasa)"
replace country_wb = "Czech Republic" if country_wb == "Czechia"
replace country_wb = "Egypt, Arab Rep." if country_wb == "Egypt"
replace country_wb = "Hong Kong SAR, China" if country_wb == "Hong Kong"
replace country_wb = "Iran, Islamic Rep." if country_wb == "Iran"
replace country_wb = "Côte d'Ivoire" if country_wb == "Ivory Coast"
replace country_wb = "Korea, Dem. People's Rep." if country_wb == "Korea, North"
replace country_wb = "Korea, Rep." if country_wb == "Korea, Republic of"
replace country_wb = "Kyrgyz Republic" if country_wb == "Kyrgyzstan"
replace country_wb = "Lao PDR" if country_wb == "Laos"
replace country_wb = "Macao SAR, China" if country_wb == "Macau"
replace country_wb = "North Macedonia" if country_wb == "Macedonia"
replace country_wb = "Russian Federation" if country_wb == "Russia"
replace country_wb = "Slovak Republic" if country_wb == "Slovakia"
replace country_wb = "Syrian Arab Republic" if country_wb == "Syria"
replace country_wb = "Republic of China (Taiwan)" if country_wb == "Taiwan"
replace country_wb = "Venezuela, RB" if country_wb == "Venezuela"
replace country_wb = "Yemen, Rep." if country_wb == "Yemen"
replace country_wb = "Taiwan" if country_wb == "Republic of China (Taiwan)"
gen period = .
replace period = 1960 if year >= 1950 & year <= 1960
replace period = 1980 if year >= 1961 & year <= 1980
replace period = 2000 if year >= 1981 & year <= 2000
replace period = 2020 if year >= 2001 & year <= 2020
tab year period, m
collapse (mean) cement, by(country_wb period)
ren period year
* sum of cement production in 20-year period
ren cement cement20
label var cement "mean of cement production in last 20 yrs (10 yrs for 1960)"
codebook cement
sort country_wb year 
save cement20, replace

* We combine with the main data set 
use LACdata, clear
sort country_wb year 
merge country_wb year using cement
tab _m
tab country_wb if _m == 1
bysort country_wb: egen maxmerge = max(_m)
tab maxmerge
* Countries with missing cement data. 
drop if _m == 2
drop _m
codebook cement if year >= 1965 & maxmerge == 3 
tab ccodeyr if year >= 1965 & maxmerge == 3 & cement == .
codebook year if year >= 1965
drop maxmerge
** FROM 1960 **
* this is for the countries for which the data is complete from 1960
gen count = 1 if cement != . & year >= 1965
bysort country_wb: egen sumcount = sum(count)
tab sumcount
gen cement_full60 = cement if sumcount == 12
gen cement_full60_yn = (sumcount == 12)
drop count sumcount
label var cement_full60 "sum of cement prod in last 5 years if all yrs available since 1960"
label var cement_full60_yn "dummy if cement prod in last 5 years available in all yrs since 1960"
* sum cement production 1960-2017 for countries with full data from 1960
bysort country_wb: egen cement19602017 = sum(cement_full60) if cement_full60 != .
** FROM 1970 **
* this is for the countries for which the data is complete from 1960
gen count = 1 if cement != . & year >= 1975
bysort country_wb: egen sumcount = sum(count)
tab sumcount
gen cement_full70 = cement if sumcount == 10
gen cement_full70_yn = (sumcount == 10)
drop count sumcount
label var cement_full70 "sum of cement prod in last 5 years if all yrs available since 1970"
label var cement_full70_yn "dummy if cement prod in last 5 years available in all yrs since 1970"
* sum cement production 1970-2017 for countries with full data from 1970
bysort country_wb: egen cement19702017 = sum(cement_full70) if cement_full70 != .
label var cement19702017 "full sum of cement prod 1970-2017"
* sum of cement production past 10 yrs
sort country_wb year 
merge country_wb year using cement10
tab _m
drop if _m == 2
drop _m
* sum of cement production past 20 yrs
sort country_wb year 
merge country_wb year using cement20
tab _m
drop if _m == 2
drop _m
tab ccodeyr if cement20 == . & (year == 1960 | year == 1980 | year == 2000 | year == 2020)
count if year == 1960 | year == 1980 | year == 2000 | year == 2020
gen lcement10 = log(cement10) 
gen lcement20 = log(cement20) 
label var lcement10 "log of cement10"
label var lcement20 "log of cement20"
sort ccode year
save LACdata, replace

*** WE CLEAN THE DATA ***

use LACdata, clear
drop if year == . 
sort ccode year
merge ccode year using shtiparch
tab _m
tab ccode if _m == 2
drop if _m == 2
drop _m
sum shtip if year == 2020
sum sharch if year == 2020 
bysort year: count if shtip > 0 & shtip != .
bysort year: count if sharch > 0 & sharch != .
sum sumht_all if year == 2020
label var shtip "Ratio of sum of occupied height to sum of height to tip"
label var sharch "Ratio of sum of occupied height to sum of architectural height"
* urban height density *
sum sumht_all, d
* MAIN SUM - minimum 32.004
codebook sumht_all
sum sumht_* 
** 2 SOLUTIONS: EITHER GIVE MIN VALUE OR DO THE +1 **
foreach X of varlist sumht* {
*replace `X' = 32.004 if `X' == .
replace `X' = 0 if `X' == .
replace `X' = `X'+1
}
codebook sumht_*
** we now create the measures of urban height density **
* for that, we need urban pop
sum pop, d
* in 000s
gen upop = pop/100*urbrate
label var upop "Urban pop (000s)"
* in 000s too
* height is in m
* so we do m per million urban inh.
* HEIGHT
foreach X in all resid nores offic hotel retai gover priv { 
gen uht_`X' = sumht_`X'/(upop/1000)
sum uht*, d
gen luht_`X' = log(uht_`X')
label var uht_`X' "m of heights per million urban inh."
label var luht_`X' "Log m of heights per million urban inh."
}
foreach N in 100 125 130 140 150 160 165 170 200 210 240 conc comp othr stel {
foreach X in all resid nores priv gover { 
gen uht`N'_`X' = sumht`N'_`X'/(upop/1000)
sum uht*, d
gen luht`N'_`X' = log(uht`N'_`X')
label var uht`N'_`X' "m of heights per million urban inh, type = `X'"
label var luht`N'_`X' "Log m of heights per million urban inh, type = `X'"
}
}
* VANITY - m per million urban inh.
foreach N in tipocp archocp {
foreach X in all resid nores priv gover { 
gen uht`N'_`X' = sumht`N'_`X'/(upop/1000)
sum uht*, d
gen luht`N'_`X' = log(uht`N'_`X')
label var uht`N'_`X' "vanity measure, `N', `X'"
label var luht`N'_`X' "log vanity measure, `N', `X'"
}
}
* LARGEST/CAPITAL OR NOT
foreach N in lc nolc l nol c noc {
foreach X in all resid nores priv gover { 
gen uht`N'_`X' = sumht`N'_`X'/(upop/1000)
sum uht*, d
gen luht`N'_`X' = log(uht`N'_`X')
label var uht`N'_`X' "urban height density, `N' city, `X'"
label var luht`N'_`X' "log urban height density, `N' city, `X'"
}
}
save LACdata, replace

******************
*** SLUM SHARE ***
******************

* Source: United Nations, "Proportion of the Urban Population Living in Slums, United Nations, SDG Global Database., 2020.
* Last accessed 10-11-2020
use dataslum, clear
keep country_wb slum*
replace country_wb = "Côte d'Ivoire" if country_wb == "Cote d'Ivoire"
replace country_wb = "Eswatini" if country_wb == "Swaziland"
sort country_wb
save slumsh, replace

use LACdata, clear
sort country_wb
merge country_wb using slumsh
tab _m
drop if _m == 2
drop _m
save LACdata, replace

*********************************
*** GDP SHARE OF CONSTRUCTION ***
*********************************

* Source: United Nations, "System of National Accounts (SNA) - Analysis of Main Aggregates (AMA) Database," 2020.
* Last accessed: 02-19-2021
* United Nations Data as main source
* Created in the folder "Other Files"
use "constr_gdp(02.19.21).dta", clear
codebook ccode
tab year
codebook constr_gdp*
keep ccode year constr_gdp_ma2
gen ccodeyr = ccode+string(year)
tab ccodeyr if constr_gdp == .
tab year if constr_gdp == .
tab year if constr_gdp != .
sum constr_gdp, d
ren constr_gdp constr_sh_gdp
drop ccodeyr
sort ccode year
save construction, replace

* We combine with the main data 
use LACdata, clear
sort ccode year
merge ccode year using construction, update
tab _m
drop _m
* Increases in NRX mechanically decrease in CONSTR
* We thus obtain non-NRX GDP
gen gdp = pcgdp*pop
sum nrxag_sh_gdp, d
sum constr_sh_gdp, d
gen gdp_nrxag = gdp/100*nrxag_sh_gdp
gen gdp_mfgserv = gdp/100*mfgservshare
* GDP without these
gen gdp_nonnrxag = gdp/100*(100-nrxag_sh_gdp)
gen gdp_nonmfgserv = gdp/100*(100-mfgservshare)
gen gdp_nonboth = gdp/100*(100-nrxag_sh_gdp-mfgservshare)
gen gdp_const = gdp/100*constr_sh_gdp
gen constr_sh_gdp_nonrxag = gdp_const/gdp_nonnrxag*100
sum constr_sh_gdp_nonrxag, d
label var constr_sh_gdp_nonrxag "Construction GDP as a share of non-NRX GDP"
gen constr_sh_gdp_nonmfgserv = gdp_const/gdp_nonmfgserv*100
sum constr_sh_gdp_nonmfgserv, d
label var constr_sh_gdp_nonmfgserv "Construction GDP as a share of non-MFGSERV GDP"
gen constr_sh_gdp_nonboth = gdp_const/gdp_nonboth*100
sum constr_sh_gdp_nonboth, d
drop gdp gdp_*
label var constr_sh_gdp_nonboth "Construction GDP as a share of non-NRX+MFGSERV GDP"
* values in 1960 1970 * 
foreach X in 1960 1970 {
foreach V in constr_sh_gdp constr_sh_gdp_nonrxag constr_sh_gdp_nonmfgserv constr_sh_gdp_nonboth {
gen `V'_`X' = `V' if year == `X'
bysort ccode: egen `V'`X' = max(`V'_`X')
drop `V'_`X'
label var `V'`X' "Value of variable `V' in `X'"
}
}
save LACdata, replace

*************************
*** FUEL EXPORTS DATA ***
*************************

* Fuels exports only
* Created in the folder "IPUMS and Other Files"
use "dataset_fuels(10.04.2021).dta", clear
sort ccode year
save fuelsx, replace

* We combine with the main data 
use LACdata, clear
sort ccode year
merge ccode year using fuelsx
tab _m
drop if _m == 2
drop _m
sum fuels_x_ma2, d
codebook fuels_x_ma2
tab country_wb year if fuels_x_ma2 == .
* Fuel X as share of GDP *
gen fuel_sh_x = fuels_x_ma2
foreach X in fuel {
gen `X'_sh_gdp = merch_x_gdp/100*`X'_sh_x
label var `X'_sh_gdp "Share of `X' exports in GDP (%) in t"
sum `X'_sh_gdp
}
corr minfuel_sh_gdp fuel_sh_gdp
sum minfuel_sh_gdp fuel_sh_gdp
gen min_sh_gdp = minfuel_sh_gdp - fuel_sh_gdp
sum min_sh_gdp, d
* Some issues 
* We correct the < 0 values
gen min_sh_gdp_noneg = min_sh_gdp
replace min_sh_gdp_noneg = 0 if min_sh_gdp != . & min_sh_gdp < 0 
sum min_sh_gdp_noneg
label var fuel_sh_x "Exports of fuels (% of Merch X) (MA2)"
label var min_sh_gdp "Share of mining exports in GDP (%) in t"
label var min_sh_gdp_noneg "Share of mining exports in GDP (%) in t, excl neg obs"
save LACdata, replace
* This is the final data set 
tab year 
* 116 countries x 13 years from 1960 to 2020

*********************************************************************************************************************************************************
*********************************************************************************************************************************************************

* We now create the figures and tables of the paper.

*****************
*****************
**# FIGURE 5 #1
**# TABLE D3 #2
*****************
*****************

* We first run the regressions to obtain the coefficients. 

***** LONG-DIFFERENCE ANALYSES *****

use LACdata, clear
foreach X in 1960 2010 2020 {
gen pop_`X' = pop if year == `X'
bysort ccode: egen pop`X' = max(pop_`X')
drop pop_`X'
}
gen pop201020 = pop2020 if year == 2020
gen fireshare = fire2020_ma5_un
* DEINDUSTRILIZATION * 
foreach X in 1960 1980 1990 2020 {
gen indu_`X' = mfgshare if year == `X'
bysort country_gjv: egen indu`X' = max(indu_`X')
drop indu_`X'
}
sum indu*
gen chg_19602020 = indu2020 - indu1960
gen chg_19802020 = indu2020 - indu1980
gen chg_19902020 = indu2020 - indu1990
sum chg_19802020 if country_wb == "Brazil" & year == 2020
* -21.2%
gen neg = (chg_19802020 < 0)
gen negchg_19802020 = 0 
replace negchg_19802020 = - chg_19802020 if neg == 1
drop indu* 
* TYPE OF NRX *
bysort ccode: egen agrimean = mean(agri_sh_gdp)
bysort ccode: egen minfuelmean = mean(minfuel_sh_gdp)
* LPGCPD
foreach X in 1960 2010 2020 {
gen lpcgdp_`X' = lpcgdp if year == `X'
bysort ccode: egen lpcgdp`X' = max(lpcgdp_`X')
drop lpcgdp_`X'
}
* Share agri and minfuel 1960 *
foreach V in agri_sh_gdp minfuel_sh_gdp {
foreach X in 1960 {
gen `V'`X'2 = `V' if year == `X'
bysort ccode: egen `V'`X' = max(`V'`X'2)
drop `V'`X'2
}
}
foreach X in ag {
foreach Z in 201020 {
foreach Y in 2020 {
gen nrxmean = nrx`X'_mean`Y'
sum urbrate nrxmean mfgservshare negchg_19802020
}
}
}
drop nrxmean

capture erase "tables/tableD3.xls"
capture erase "tables/tableD3.tex"
capture erase "tables/tableD3.txt"

** PANEL A **
foreach X in ag {
foreach Z in 201020 {
foreach Y in 2020 {
gen nrxmean = nrx`X'_mean`Y'
gen nrx1960 = nrx`X'_sh_gdp1960
xi: reg urbrate nrxmean mfgservshare negchg_19802020 urbrate1960 nrx1960 mfgserv_ca1960 larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop`Z'] if year == `Y', robust beta
outreg2 nrxmean *share negchg_19802020 using "tables/tableD3.xls", keep(nrxmean *share negchg_19802020) addtext(type, `X', wgts, `Z', year, 2020, ctrls, yes) se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
drop nrxmean nrx1960
regsave nrxmean mfgservshare negchg_19802020 using "intermediary/table2/table1_col1_A", replace ci level(90) pval
}
}
}
** PANEL B **
foreach X in ag {
foreach Z in 201020 {
foreach Y in 2020 {
xi: reg urbrate agrimean minfuelmean mfgservshare negchg_19802020 urbrate1960 agri_sh_gdp1960 minfuel_sh_gdp1960 mfgserv_ca1960 larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop`Z'] if year == `Y', robust beta
outreg2 *mean *share negchg_19802020 using "tables/tableD3.xls", keep(*mean *share negchg_19802020) addtext(type, `X', wgts, `Z', year, 2020, ctrls, yes) se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
regsave agrimean minfuelmean using "intermediary/table2/table1_col1_B", replace ci level(90) pval
}
}
}

*** ROBUSTNESS WITH MFG+FIRE ***
* We create MFG+FIRE
gen mfgfireshare = mfgshare+fire2020_ma5_un if mfgshare != . & fire2020_ma5_un != . 
capture erase "tables/tableD3_robfire.xls"
capture erase "tables/tableD3_robfire.tex"
capture erase "tables/tableD3_robfire.txt"
** PANEL A **
foreach X in ag {
foreach Z in 201020 {
foreach Y in 2020 {
gen nrxmean = nrx`X'_mean`Y'
gen nrx1960 = nrx`X'_sh_gdp1960
xi: reg urbrate nrxmean mfgfireshare negchg_19802020 urbrate1960 nrx1960 mfgserv_ca1960 larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop`Z'] if year == `Y', robust beta
outreg2 nrxmean *share negchg_19802020 using "tables/tableD3_robfire.xls", keep(nrxmean *share negchg_19802020) addtext(type, `X', wgts, `Z', year, 2020, ctrls, yes) se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
drop nrxmean nrx1960
}
}
}
** PANEL B **
foreach X in ag {
foreach Z in 201020 {
foreach Y in 2020 {
xi: reg urbrate agrimean minfuelmean mfgfireshare negchg_19802020 urbrate1960 agri_sh_gdp1960 minfuel_sh_gdp1960 mfgserv_ca1960 larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop`Z'] if year == `Y', robust beta
outreg2 *mean *share negchg_19802020 using "tables/tableD3_robfire.xls", keep(*mean *share negchg_19802020) addtext(type, `X', wgts, `Z', year, 2020, ctrls, yes) se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
}
}
}

***** PANEL ANALYSES *****

** 20 YEARS **

use LACdata, clear
gen lastyr = substr(string(year),4,1)
tab lastyr
destring lastyr, replace 
drop if lastyr == 5
keep if year >= 1960 & year <= 2020
keep if year == 1960 | year == 1980 | year == 2000 | year == 2020
foreach X in 1960 2020 {
gen pop_`X' = pop if year == `X'
bysort ccode: egen pop`X' = max(pop_`X')
drop pop_`X'
}
gen popt = pop
sort ccode year
bysort ccode: gen lag1nrxag_sh_gdp = nrxag_sh_gdp[_n-1]
sort ccode year
bysort ccode: gen lag1mfgservshare = mfgservshare[_n-1]
* Agri Minfuel
foreach X in 1 {
sort ccode year
bysort ccode: gen lag`X'agri_sh_gdp = agri_sh_gdp[_n-`X']
sort ccode year
bysort ccode: gen lag`X'minfuel_sh_gdp = minfuel_sh_gdp[_n-`X']
sort ccode year
bysort ccode: gen lag`X'urbrate = urbrate[_n-`X']
}
* Deindustrialization variables *
foreach X in 1980 {
gen indu_`X' = mfgshare if year == `X'
bysort country_gjv: egen indu`X' = max(indu_`X')
drop indu_`X'
}
gen indu = mfgshare
gen chg_1980t = indu - indu1980
gen neg = (chg_1980t < 0)
gen negchg_1980t = 0 
replace negchg_1980t = -chg_1980t if neg == 1 & year >= 1980
sum negchg_1980t, d
sum negchg_1980t if country_wb == "Brazil"
* 21%
bysort year: sum negchg_1980t
sum mfgservshare negchg_1980t
corr mfgservshare negchg_1980t [w=pop2020]
* -0.47
sort ccode year
bysort ccode: gen deindu = mfgshare[_n-1] - mfgshare 
bysort year: sum deindu [w=pop]
replace deindu = 0 if deindu < 0
bysort year: sum deindu [w=pop], d
replace deindu = 0 if year <= 1980
bysort year: sum deindu [w=pop], d

** PANEL A **
foreach C in ag {
foreach Z in 2020 {
xi: areg urbrate lag1nrx`C'_sh_gdp mfgservshare negchg_1980t lpop lpop_sq i.year [w=pop`Z'] if year <= 2020, robust absorb(ccode) clust(ccode)
outreg2 lag1nrx* mfg_sh_x using "tables/tableD3.xls", keep(lag1nrx* *mfgservshare negchg_1980t) addtext(wgts, `Z') se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
regsave lag1nrx`C'_sh_gdp mfgservshare negchg_1980t using "intermediary/table2/table1_col2_A", replace ci level(90) pval
}
}
** PANEL B **
foreach Z in 2020 {
xi: areg urbrate lag1minfuel_sh_gdp lag1agri_sh_gdp mfgservshare negchg_1980t lpop lpop_sq i.year [w=pop`Z'] if year <= 2020, robust absorb(ccode) clust(ccode)
outreg2 lag1nrx* mfg_sh_x using "tables/tableD3.xls", keep(lag1* *mfgservshare negchg_1980t) addtext(wgts, `Z') se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
regsave lag1minfuel_sh_gdp lag1agri_sh_gdp using "intermediary/table2/table1_col2_B", replace ci level(90) pval
}
*** ADDING A LAG OF URBANIZATION ***
** PANEL A **
foreach C in ag {
foreach Z in 2020 {
xi: areg urbrate lag1urbrate lag1nrx`C'_sh_gdp mfgservshare negchg_1980t lpop lpop_sq i.year [w=pop`Z'] if year <= 2020, robust absorb(ccode) clust(ccode)
outreg2 lag1nrx* mfg_sh_x using "tables/tableD3_lagurb.xls", keep(lag1urbrate lag1nrx* *mfgservshare negchg_1980t) addtext(wgts, `Z') se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes replace 
}
}
** PANEL B **
foreach Z in 2020 {
xi: areg urbrate lag1urbrate lag1minfuel_sh_gdp lag1agri_sh_gdp mfgservshare negchg_1980t lpop lpop_sq i.year [w=pop`Z'] if year <= 2020, robust absorb(ccode) clust(ccode)
outreg2 lag1nrx* mfg_sh_x using "tables/tableD3_lagurb.xls", keep(lag1urbrate lag1* *mfgservshare negchg_1980t) addtext(wgts, `Z') se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
}

** 10 YEARS **

use LACdata, clear
gen lastyr = substr(string(year),4,1)
tab lastyr
destring lastyr, replace 
drop if lastyr == 5
keep if year >= 1960 & year <= 2020
foreach X in 1960 2020 {
gen pop_`X' = pop if year == `X'
bysort ccode: egen pop`X' = max(pop_`X')
drop pop_`X'
}
gen popt = pop
sort ccode year
bysort ccode: gen lag1nrxag_sh_gdp = nrxag_sh_gdp[_n-1]
sort ccode year
bysort ccode: gen lag1mfgservshare = mfgservshare[_n-1]
* Agri Minfuel
foreach X in 1 {
sort ccode year
bysort ccode: gen lag`X'agri_sh_gdp = agri_sh_gdp[_n-`X']
sort ccode year
bysort ccode: gen lag`X'minfuel_sh_gdp = minfuel_sh_gdp[_n-`X']
sort ccode year
bysort ccode: gen lag`X'urbrate = urbrate[_n-`X']
}

* Deindustrialization variables *
foreach X in 1980 {
gen indu_`X' = mfgshare if year == `X'
bysort country_gjv: egen indu`X' = max(indu_`X')
drop indu_`X'
}
gen indu = mfgshare
gen chg_1980t = indu - indu1980
gen neg = (chg_1980t < 0)
gen negchg_1980t = 0 
replace negchg_1980t = -chg_1980t if neg == 1 & year >= 1980
sum negchg_1980t, d
bysort year: sum negchg_1980t
sum mfgservshare negchg_1980t
corr mfgservshare negchg_1980t [w=pop2020]
* -0.46
sum negchg_1980t if country_wb == "Brazil"
* 21%

tab ccode year if lag1nrxag_sh_gdp == . & lag1minfuel_sh_gdp != .

** PANEL A **
foreach C in ag {
foreach Z in 2020 {
xi: areg urbrate lag1nrx`C'_sh_gdp mfgservshare negchg_1980t lpop lpop_sq i.year [w=pop`Z'] if year <= 2020, robust absorb(ccode) clust(ccode)
outreg2 lag1nrx* mfg_sh_x using "tables/tableD3.xls", keep(lag1nrx* *mfgservshare negchg_1980t) addtext(wgts, `Z') se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
regsave lag1nrx`C'_sh_gdp mfgservshare negchg_1980t using "intermediary/table2/table1_col3_A", pval replace ci level(90)
}
}
** PANEL B **
foreach Z in 2020 {
xi: areg urbrate lag1minfuel_sh_gdp lag1agri_sh_gdp mfgservshare negchg_1980t lpop lpop_sq i.year [w=pop`Z'] if year <= 2020, robust absorb(ccode) clust(ccode)
outreg2 lag1nrx* mfg_sh_x using "tables/tableD3.xls", keep(lag1* *mfgservshare negchg_1980t) addtext(wgts, `Z') se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
regsave lag1minfuel_sh_gdp lag1agri_sh_gdp using "intermediary/table2/table1_col3_B", pval replace ci level(90)
}
***** ADDING A LAG OF URBANIZATION *****
** PANEL A **
foreach C in ag {
foreach Z in 2020 {
xi: areg urbrate lag1urbrate lag1nrx`C'_sh_gdp mfgservshare negchg_1980t lpop lpop_sq i.year [w=pop`Z'] if year <= 2020, robust absorb(ccode) clust(ccode)
outreg2 lag1nrx* mfg_sh_x using "tables/tableD3_lagurban.xls", keep(lag1urbrate lag1nrx* *mfgservshare negchg_1980t) addtext(wgts, `Z') se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
}
}
** PANEL B **
foreach Z in 2020 {
xi: areg urbrate lag1urbrate lag1minfuel_sh_gdp lag1agri_sh_gdp mfgservshare negchg_1980t lpop lpop_sq i.year [w=pop`Z'] if year <= 2020, robust absorb(ccode) clust(ccode)
outreg2 lag1nrx* mfg_sh_x using "tables/tableD3_lagurban.xls", keep(lag1urbrate lag1* *mfgservshare negchg_1980t) addtext(wgts, `Z') se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
}

** 5 YEARS **

use LACdata, clear
keep if year >= 1960 & year <= 2020
foreach X in 1960 2020 {
gen pop_`X' = pop if year == `X'
bysort ccode: egen pop`X' = max(pop_`X')
drop pop_`X'
}
gen popt = pop
sort ccode year
bysort ccode: gen lag1nrxag_sh_gdp = nrxag_sh_gdp[_n-1]
sort ccode year
bysort ccode: gen lag1mfgservshare = mfgservshare[_n-1]
* Agri Minfuel
foreach X in 1 {
sort ccode year
bysort ccode: gen lag`X'agri_sh_gdp = agri_sh_gdp[_n-`X']
sort ccode year
bysort ccode: gen lag`X'minfuel_sh_gdp = minfuel_sh_gdp[_n-`X']
sort ccode year
bysort ccode: gen lag`X'urbrate = urbrate[_n-`X']
}
* Deindustrialization variables *
foreach X in 1980 {
gen indu_`X' = mfgshare if year == `X'
bysort country_gjv: egen indu`X' = max(indu_`X')
drop indu_`X'
}
gen indu = mfgshare
gen chg_1980t = indu - indu1980
gen neg = (chg_1980t < 0)
gen negchg_1980t = 0 
replace negchg_1980t = -chg_1980t if neg == 1 & year >= 1980
sum negchg_1980t, d
bysort year: sum negchg_1980t
sum mfgservshare negchg_1980t

** PANEL A **
foreach C in ag {
foreach Z in 2020 {
xi: areg urbrate lag1nrx`C'_sh_gdp mfgservshare negchg_1980t lpop lpop_sq i.year [w=pop`Z'] if year <= 2020, robust absorb(ccode) clust(ccode)
outreg2 lag1nrx* mfg_sh_x using "tables/tableD3.xls", keep(lag1nrx* *mfgservshare negchg_1980t) addtext(wgts, `Z') se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
regsave lag1nrx`C'_sh_gdp mfgservshare negchg_1980t using "intermediary/table2/table1_col4_A", replace pval ci level(90)
}
}
** PANEL B **
foreach Z in 2020 {
xi: areg urbrate lag1minfuel_sh_gdp lag1agri_sh_gdp mfgservshare negchg_1980t lpop lpop_sq i.year [w=pop`Z'] if year <= 2020, robust absorb(ccode) clust(ccode)
outreg2 lag1nrx* mfg_sh_x using "tables/tableD3.xls", keep(lag1* *mfgservshare negchg_1980t) addtext(wgts, `Z') se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
regsave  lag1minfuel_sh_gdp lag1agri_sh_gdp using "intermediary/table2/table1_col4_B", replace pval ci level(90)
}
*** WITH LAG OF URBAN ***
** PANEL A **
foreach C in ag {
foreach Z in 2020 {
xi: areg urbrate lag1urbrate lag1nrx`C'_sh_gdp mfgservshare negchg_1980t lpop lpop_sq i.year [w=pop`Z'] if year <= 2020, robust absorb(ccode) clust(ccode)
outreg2 lag1nrx* mfg_sh_x using "tables/tableD3_lagurban.xls", keep(lag1urbrate  lag1nrx* *mfgservshare negchg_1980t) addtext(wgts, `Z') se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
}
}
** PANEL B **
foreach Z in 2020 {
xi: areg urbrate lag1urbrate lag1minfuel_sh_gdp lag1agri_sh_gdp mfgservshare negchg_1980t lpop lpop_sq i.year [w=pop`Z'] if year <= 2020, robust absorb(ccode) clust(ccode)
outreg2 lag1nrx* mfg_sh_x using "tables/tableD3_lagurban.xls", keep(lag1urbrate lag1* *mfgservshare negchg_1980t) addtext(wgts, `Z') se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
}


***** WE NOW CREATE FIGURE 5 *****

* Preparing data * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *

* Load files, identify source and temp save
foreach i of numlist 1/4{														
	foreach j in A B{
		use "intermediary/table2/table1_col`i'_`j'.dta", clear
		gen filename = "table1_col`i'_`j'"										
		tempfile table1_col`i'_`j'
		save `table1_col`i'_`j'', replace
	}
}

clear all

* Append all files corresponding to Table 1
foreach i of numlist 1/4{
	foreach j in A B{
		append using `table1_col`i'_`j''
	}
}

* Assigning stars to each coefficient
gen 	stars = ""
replace stars = "*" 	if pval < .10
replace stars = "**" 	if pval < .05
replace stars = "***" 	if pval < .01

* Generate variable identifying column and panel
gen column = substr(filename, 11, 1)
gen panel  = substr(filename, 13, 1)

* Rename Columns numbers by their respective name in the paper
replace column = "Panel A: Long-Differences 1960-2020" 	if column == "1"
replace column = "Panel B: 20-Year Periods"				if column == "2"
replace column = "Panel C: 10-Year Periods" 			if column == "3"
replace column = "Panel D: 5-Year Periods" 				if column == "4" 

* Creating Figures * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *

* List rownumber for coefficients
gen rownum = 1
replace rownum = 2 if (var == "mfgservshare")
replace rownum = 3 if (var == "negchg_19802020" | var == "negchg_1980t")
replace rownum = 4 if (var == "minfuelmean" 	| var == "lag1minfuel_sh_gdp")
replace rownum = 5 if (var == "agrimean" 		| var == "lag1agri_sh_gdp")


* Plot for long-differences * * * * * 

preserve

keep if column == "Panel A: Long-Differences 1960-2020"

twoway ///
	(rcap ci_lower ci_upper rownum, lcolor(black) horizontal 					/// Confidence Intervals
			by(column, xrescale legend(off) note("") colfirst ))				/// By-Comand styling
	(scatter rownum coef, by(column) mcolor(blue) msymbol(D) msize(medium)		/// Coefficients plotting and styling
			mlabel(stars) mlabcolor(blue) mlabposition(6)						/// Coefficients stars + styling
						  mlabgap(*.5) mlabsize(medium)) 						/// .
	, 																			///									
	ylabel(1 "NRXGDP (%) 2020" 													/// Labels for variables (Y axis)
		   2 "MFGSERV (%) 2020"  												///	.
		   3 "DEINDU (%) 1980-2020" 											///	.							
		   4 "FMXGDP (%) 2020" 													///	.
		   5 "AGXGDP (%) 2020" 													///	.
		   , angle(0) nogrid)													/// .
	xline(0, lpattern(longdash) lwidth(medthin) lcolor(red)) 					///	Vertical line at 0 
	yline(3.5, lpattern(shortdash) lcolor(gs5)) 								/// Horizontal line (separate coefs)
	subtitle(,bcolor(white) lcol(black)) 										/// Define style of panel subtitles
	plotregion(margin(b=+2)) 													/// Adjust margins of plot
	ytitle("") 																	/// Eliminate Y axis title
	yscale(reverse) 															///	Flip Y axis
	saving(g1, replace)															// Save for combining later

restore

* Plots for panel data results * * * * *

preserve

drop if column == "Panel A: Long-Differences 1960-2020"

twoway ///
	(rcap ci_lower ci_upper rownum, lcolor(black) horizontal 					/// Confidence intervals
			by(column, row(1) xrescale legend(off) note("") colfirst)) 			/// By-Comand styling
	(scatter rownum coef, by(column) mcolor(blue) msymbol(D) msize(medium)		/// Coefficients plotting and styling
			mlabel(stars) mlabcolor(blue) mlabposition(6)						/// Coefficients stars + styling
						  mlabgap(*.5) mlabsize(medium)) 						/// .
	, 																			/// 
	ylabel(1 "NRXGDP (%) t-1" 													/// Labels for variables (Y axis)
		   2 "MFGSERV (%) t"  													/// .
		   3 "Deindu (%) 1980-t" 												/// .
		   4 "FMXGDP (%) t-1" 													/// .
		   5 "AGXGDP (%) t-1"  													/// .
		   , angle(0) noticks nogrid)											/// .
	xline(0, lpattern(longdash) lwidth(medium) lcolor(red)) 					///	Vertical line at 0 
	yline(3.5, lpattern(shortdash) lcolor(gs5)) 								/// Horizontal line (separate coefs)
	subtitle(,bcolor(white) lcol(black)) 										///	Define style of panel subtitles
	plotregion(margin(b=+2)) 													/// Adjust margins of plot
	ytitle("") 																	/// Eliminate Y axis title
	yscale(reverse) 															///	Flip Y axis
	saving(g2, replace)															// Save for combining later

restore	

* Final Figure * * * * *

* Combining both plots to create final figure and save
gr combine g1.gph g2.gph, col(1) xcommon
*graph export "output/table2new.png", replace width(2620) height(1908)
graph export "Figures/table2new.png", replace width(3000) height(1908)
graph export "Figures/figure5.png", replace width(3000) height(1908)

* Erase 'temporary' graphs
erase "g1.gph" 
erase "g2.gph"

*****************
*****************
**# TABLE D4 #4
*****************
*****************

program drop _all
program proglincom, rclass
    lincom `0'
    local coefftxttmp = string(r(estimate),"%9.2f")
    local se = r(se)
    return local setxt = "["+string(`se',"%9.2f")+"]"
    local tstat=r(estimate)/r(se)
    gen pval = tprob(r(df), abs(`tstat'))*100
    egen pvalcat = cut(pval), at(0,1,5,10,100)
    lab def starlab 0 "***" 1 "**" 5 "*" 10 "", replace
    lab val pvalcat starlab
    decode pvalcat, gen(stars)
	local startxt = stars
	return local coefftxt = "`coefftxttmp'" + "`startxt'"
	drop pval pvalcat stars
end 
use LACdata, clear
gen lastyr = substr(string(year),4,1)
tab lastyr
destring lastyr, replace 
drop if lastyr == 5
keep if year >= 1960 & year <= 2020
foreach X in 1960 2020 {
gen pop_`X' = pop if year == `X'
bysort ccode: egen pop`X' = max(pop_`X')
drop pop_`X'
}
gen popt = pop
* Deindustrialization variables *
foreach X in 1980 {
gen indu_`X' = mfgshare if year == `X'
bysort country_gjv: egen indu`X' = max(indu_`X')
drop indu_`X'
}
gen indu = mfgshare
gen chg_1980t = indu - indu1980
sum chg_1980t
sum chg_1980t if country_wb == "Brazil"
gen neg = (chg_1980t < 0)
gen negchg_1980t = 0 
replace negchg_1980t = -chg_1980t if neg == 1 & year >= 1980

* We create leads and lags of the main variables 
foreach X in 1 2 3 4 {
sort ccode year
bysort ccode: gen lag`X'nrxag_sh_gdp = nrxag_sh_gdp[_n-`X']
sort ccode year
bysort ccode: gen lag`X'mfgservshare = mfgservshare[_n-`X']
sort ccode year
bysort ccode: gen lag`X'negchg_1980t = negchg_1980t[_n-`X']
}
foreach X in 1 {
sort ccode year
bysort ccode: gen lead`X'nrxag_sh_gdp = nrxag_sh_gdp[_n+`X']
sort ccode year
bysort ccode: gen lead`X'mfgservshare = mfgservshare[_n+`X']
sort ccode year
bysort ccode: gen lead`X'negchg_1980t = negchg_1980t[_n+`X']
}
foreach X in 1 2 3 4 {
sort ccode year
bysort ccode: gen lag`X'agri_sh_gdp = agri_sh_gdp[_n-`X']
sort ccode year
bysort ccode: gen lag`X'minfuel_sh_gdp = minfuel_sh_gdp[_n-`X']
}

* We run the regressions 
capture erase "tables/tableD4.xls"
capture erase "tables/tableD4.tex"
capture erase "tables/tableD4.txt"
* 1
foreach C in ag {
foreach Z in 2020 {
xi: areg urbrate lag1nrx`C'_sh_gdp mfgservshare negchg_1980t lpop lpop_sq i.year [w=pop`Z'] if year <= 2020, robust absorb(ccode) clust(ccode)
outreg2 lag1nrx* mfg_sh_x using "tables/tableD4.xls", keep(lag1nrx* *mfgservshare negchg_1980t) addtext(wgts, `Z') se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
}
}
* 2
foreach C in ag {
foreach Z in 2020 {
xi: areg urbrate lag1nrx`C'_sh_gdp mfgservshare negchg_1980t nrx`C'_sh_gdp lead1mfgservshare lead1negchg_1980t lpop lpop_sq i.year [w=pop`Z'] if year <= 2020, robust absorb(ccode) clust(ccode)
outreg2 lag1nrx* mfg_sh_x using "tables/tableD4.xls", keep(*nrx* *mfgservshare *negchg_1980t) addtext(wgts, `Z') se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
}
}
* 3
foreach C in ag {
foreach Z in 2020 {
local lincomset0 "lag1nrx`C'_sh_gdp + lag2nrx`C'_sh_gdp"
local lincomset1 "mfgservshare + lag1mfgservshare"
local lincomset2 "negchg_1980t + lag1negchg_1980t"
xi: areg urbrate lag1nrx`C'_sh_gdp mfgservshare negchg_1980t lag2nrx`C'_sh_gdp lag1mfgservshare lag1negchg_1980t lpop lpop_sq i.year [w=pop`Z'] if year <= 2020, robust absorb(ccode) clust(ccode)
proglincom "`lincomset0'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset1'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
proglincom "`lincomset2'"
local coeff3 = r(coefftxt)
local se3 = r(setxt)
outreg2 * using "tables/tableD4.xls", keep(*nrx* *mfgservshare* *negchg_1980t) addtext(nrxcoef, "`coeff1'", nrxse, "`se1'", mfgservcoef, "`coeff2'", mfgservse, "`se2'", deinducoef, "`coeff3'", deinduse, "`se3'") se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
}
}
* 4 
foreach C in ag {
foreach Z in 2020 {
local lincomset0 "lag1nrx`C'_sh_gdp + lag2nrx`C'_sh_gdp + lag3nrx`C'_sh_gdp"
local lincomset1 "mfgservshare + lag1mfgservshare + lag2mfgservshare"
local lincomset2 "negchg_1980t + lag1negchg_1980t + lag2negchg_1980t"
xi: areg urbrate lag1nrx`C'_sh_gdp mfgservshare negchg_1980t lag2nrx`C'_sh_gdp lag1mfgservshare lag1negchg_1980t lag3nrx`C'_sh_gdp lag2mfgservshare lag2negchg_1980t lpop lpop_sq i.year [w=pop`Z'] if year <= 2020, robust absorb(ccode) clust(ccode)
proglincom "`lincomset0'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset1'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
proglincom "`lincomset2'"
local coeff3 = r(coefftxt)
local se3 = r(setxt)
outreg2 * using "tables/tableD4.xls", keep(*nrx* *mfgservshare* *negchg_1980t) addtext(nrxcoef, "`coeff1'", nrxse, "`se1'", mfgservcoef, "`coeff2'", mfgservse, "`se2'", deinducoef, "`coeff3'", deinduse, "`se3'") se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
}
}
* 5
foreach C in ag {
foreach Z in 2020 {
local lincomset3 "lag1agri_sh_gdp + lag2agri_sh_gdp + lag3agri_sh_gdp"
local lincomset4 "lag1minfuel_sh_gdp + lag2minfuel_sh_gdp + lag3minfuel_sh_gdp"
local lincomset1 "mfgservshare + lag1mfgservshare + lag2mfgservshare"
local lincomset2 "negchg_1980t + lag1negchg_1980t + lag2negchg_1980t"
xi: areg urbrate mfgservshare negchg_1980t lag1mfgservshare lag1negchg_1980t lag2mfgservshare lag2negchg_1980t lag1agri_sh_gdp lag2agri_sh_gdp lag3agri_sh_gdp lag1minfuel_sh_gdp lag2minfuel_sh_gdp lag3minfuel_sh_gdp lpop lpop_sq i.year [w=pop`Z'] if year <= 2020, robust absorb(ccode) clust(ccode)
proglincom "`lincomset3'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset1'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
proglincom "`lincomset2'"
local coeff3 = r(coefftxt)
local se3 = r(setxt)
proglincom "`lincomset4'"
local coeff4 = r(coefftxt)
local se4 = r(setxt)
outreg2 * using "tables/tableD4.xls", keep(*nrx* *agri* *minfuel* *mfgservshare* *negchg_1980t) addtext(agxcoef, "`coeff1'", agxse, "`se1'", mfgservcoef, "`coeff2'", mfgservse, "`se2'", deinducoef, "`coeff3'", deinduse, "`se3'", fmxcoef, "`coeff4'", fmxse, "`se4'") se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
}
}

*****************
*****************
**# TABLE D2 #5
*****************
*****************

use LACdata, clear
foreach X in 1960 2010 2020 {
gen pop_`X' = pop if year == `X'
bysort ccode: egen pop`X' = max(pop_`X')
drop pop_`X'
}
gen pop201020 = pop2020 if year == 2020
gen fireshare = fire2020_ma5_un
* DEINDUSTRILIZATION * 
foreach X in 1960 1980 1990 2020 {
gen indu_`X' = mfgshare if year == `X'
bysort country_gjv: egen indu`X' = max(indu_`X')
drop indu_`X'
}
sum indu*
gen chg_19602020 = indu2020 - indu1960
gen chg_19802020 = indu2020 - indu1980
gen chg_19902020 = indu2020 - indu1990
gen neg = (chg_19802020 < 0)
gen negchg_19802020 = 0 
replace negchg_19802020 = -chg_19802020 if neg == 1
sum negchg_19802020 if country_wb == "Brazil"
corr chg_19802020 chg_19902020 [w=pop] if year == 2020
corr chg_19802020 chg_19902020 [w=pop] if LAC == 1 & year == 2020
corr chg_19802020 chg_19602020 [w=pop] if year == 2020
corr chg_19802020 chg_19602020 [w=pop] if LAC == 1 & year == 2020
*twoway (scatter chg_19902020 chg_19802020 if year == 2020 & LAC != 1)(scatter chg_19902020 chg_19802020 if year == 2020 & LAC == 1, mlabel(ccode) mcolor(blue) mlabcolor(blue))(lfit chg_19902020 chg_19802020 if year == 2020)
*twoway (scatter chg_19902020 chg_19802020 if year == 2020 & LAC != 1, mlabel(ccode))(scatter chg_19902020 chg_19802020 if year == 2020 & LAC == 1, mcolor(blue))(lfit chg_19902020 chg_19802020 if year == 2020)
sum chg_19802020 chg_19902020 [w=pop] if LAC == 1 & year == 2020
drop indu* 
* TYPE OF NRX *
bysort ccode: egen agrimean = mean(agri_sh_gdp)
bysort ccode: egen minfuelmean = mean(minfuel_sh_gdp)
bysort ccode: egen fuelmean = mean(fuel_sh_gdp)
bysort ccode: egen minmean = mean(min_sh_gdp)
bysort ccode: egen minnonegmean = mean(min_sh_gdp_noneg)
corr fuelmean minfuelmean [w=pop] if year == 2020
* 0.92
corr minmean minfuelmean [w=pop] if year == 2020
* 0.31
corr minnonegmean minfuelmean [w=pop] if year == 2020
* 0.33
* LPGCPD
foreach X in 1960 2010 2020 {
gen lpcgdp_`X' = lpcgdp if year == `X'
bysort ccode: egen lpcgdp`X' = max(lpcgdp_`X')
drop lpcgdp_`X'
}
* Share agri and minfuel 1960 *
foreach V in agri_sh_gdp minfuel_sh_gdp fuel_sh_gdp min_sh_gdp min_sh_gdp_noneg {
foreach X in 1960 {
gen `V'`X'2 = `V' if year == `X'
bysort ccode: egen `V'`X' = max(`V'`X'2)
drop `V'`X'2
}
}
* Service share - FIRE share 
gen nonfireshare = servshare-fireshare

* We run the regressions 
capture erase "tables/tableD2.xls"
capture erase "tables/tableD2.tex"
capture erase "tables/tableD2.txt"
* BASELINE WITH DEINDUSTRIALIZATION 
foreach X in ag {
foreach Z in 201020 {
foreach Y in 2020 {
gen nrxmean = nrx`X'_mean`Y'
gen nrx1960 = nrx`X'_sh_gdp1960
xi: reg urbrate nrxmean mfgservshare negchg_19802020 urbrate1960 nrx1960 mfgserv_ca1960 larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop`Z'] if year == `Y', robust beta
outreg2 nrxmean *share negchg_19802020 using "tables/tableD2.xls", keep(nrxmean *share negchg_19802020) addtext(type, `X', wgts, `Z', year, 2020, ctrls, yes) se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
drop nrxmean nrx1960
}
}
}
* MFG AND SERV SEPARATE 
foreach X in ag {
foreach Z in 201020 {
foreach Y in 2020 {
gen nrxmean = nrx`X'_mean`Y'
gen nrx1960 = nrx`X'_sh_gdp1960
xi: reg urbrate nrxmean mfgshare servshare negchg_19802020 urbrate1960 nrx1960 mfg_ca1960 serv_ca1960 larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop`Z'] if year == `Y', robust beta
outreg2 nrxmean *share negchg_19802020 using "tables/tableD2.xls", keep(nrxmean *share negchg_19802020) addtext(type, `X', wgts, `Z', year, 2020, ctrls, yes) se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
drop nrxmean nrx1960
}
}
}
* MFG AND FIRE SEPARATE, ALSO NON-FIRE 
foreach X in ag {
foreach Z in 201020 {
foreach Y in 2020 {
gen nrxmean = nrx`X'_mean`Y'
gen nrx1960 = nrx`X'_sh_gdp1960
xi: reg urbrate nrxmean mfgshare fireshare nonfireshare negchg_19802020 urbrate1960 nrx1960 mfg_ca1960 serv_ca1960 larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop`Z'] if year == `Y', robust beta
outreg2 nrxmean *share negchg_19802020 using "tables/tableD2.xls", keep(nrxmean *share negchg_19802020) addtext(type, `X', wgts, `Z', year, 2020, ctrls, yes) se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
drop nrxmean nrx1960
}
}
}

*****************
*****************
**# TABLE D5 #5
**# FIGURE 6 #6
*****************
*****************

***** SECTORAL RESULTS *****

use LACdata, clear
** MFGSERV1960 **
foreach X in 1960 {
gen mfgservshare_`X' = mfgservshare if year == `X'
bysort ccode: egen mfgservshare`X' = max(mfgservshare_`X')
drop mfgservshare_`X'
}
** POP **
foreach X in 1960 2000 2010 2020 {
gen pop_`X' = pop if year == `X'
bysort ccode: egen pop`X' = max(pop_`X')
drop pop_`X'
}
foreach X in 2020 {
gen upop_`X' = upop if year == `X'
bysort ccode: egen upop`X' = max(upop_`X')
drop upop_`X'
}
gen pop1 = 1
gen pop201020 = pop2010 if year == 2010
replace pop201020 = pop2020 if year == 2020
sum pop201020 pop1960 pop1
foreach X in 1960 2000 2010 2020 {
gen lpcgdp_`X' = lpcgdp if year == `X'
bysort ccode: egen lpcgdp`X' = max(lpcgdp_`X')
drop lpcgdp_`X'
}
** DEINDUSTRIALIZATION **
foreach X in 1980 2000 2020 {
gen indu_`X' = mfgshare if year == `X'
bysort country_gjv: egen indu`X' = max(indu_`X')
drop indu_`X'
}
foreach X in 2000 2020 {
gen chg_1980`X' = indu`X' - indu1980
gen neg = (chg_1980`X' < 0)
gen negchg_1980`X' = 0 
replace negchg_1980`X' = chg_1980`X' if neg == 1
replace negchg_1980`X' = -negchg_1980`X'
sum negchg_1980`X', d
drop neg
}
** DISTRIBUTION SECTORS **
desc ipuI_*_t
codebook ipuI_*_t if year == 2020
** WE KEEP THE 60 OBS AND YEAR == 2000
keep if ipuI_mfg_t != . & year == 2000
sort ccode year
codebook ipuI_*_t
** FIRE **
tab ccode if ipuI_finance_t != . & ipuI_buss_service_t == .
* What to do with the business missing.
tab ccode if ipuI_finance_t != . & ipuI_buss_service_t == .
foreach G in t {
egen ipuI_fire_`G' = rsum(ipuI_finance_`G' ipuI_buss_service_`G') if ipuI_finance_`G' != . | ipuI_buss_service_`G' != .
codebook ipuI_fire_`G'
egen ipuI_mfgfire_`G' = rsum(ipuI_mfg_`G' ipuI_fire_`G') if ipuI_mfg_`G' != . & ipuI_fire_`G' != .
codebook ipuI_mfgfire_`G'
}
sum ipuI_fire_t if year == 2020, d
** GOVT **
tab ccode if ipuI_govmt_t != . & ipuI_mfg_t == .
*sum ipuI_govmt_t if year == 2020, d
** GOVT2 **
foreach G in t {
egen ipuI_govmt2_`G' = rsum(ipuI_govmt_t ipuI_educ_`G' ipuI_health_`G') if ipuI_govmt_t != . | ipuI_educ_`G' != . | ipuI_health_`G' !=.
}
** NRX **
foreach G in t {
egen ipuI_nrx_`G' = rsum(ipuI_agri_`G' ipuI_mining_`G') if ipuI_agri_`G' != . | ipuI_mining_`G' != .
}
** TRADE2 **
foreach X of varlist ipuI_agri_t-ipuI_other_industry_t {
corr ipuI_trade_t `X'
}
desc ipuI_agri_t-ipuI_other_industry_t, f
foreach G in t {
egen ipuI_tradehh_`G' = rsum(ipuI_trade_`G' ipuI_household_serv_`G') if ipuI_trade_`G' != . | ipuI_household_serv_`G' != .
egen ipuI_trade2_`G' = rsum(ipuI_trade_`G' ipuI_other_services_`G') if ipuI_trade_`G' != . | ipuI_other_services_`G' != .
egen ipuI_trade3_`G' = rsum(ipuI_trade_`G' ipuI_other_services_`G' ipuI_unspec_service_`G') if ipuI_trade_`G' != . | ipuI_other_services_`G' != . | ipuI_unspec_service_`G' != .
egen ipuI_trade2hh_`G' = rsum(ipuI_trade_`G' ipuI_other_services_`G' ipuI_household_serv_`G') if ipuI_trade_`G' != . | ipuI_other_services_`G' != . | ipuI_household_serv_`G' != .
egen ipuI_trade3hh_`G' = rsum(ipuI_trade_`G' ipuI_other_services_`G' ipuI_unspec_service_`G' ipuI_household_serv_`G') if ipuI_trade_`G' != . | ipuI_other_services_`G' != . | ipuI_unspec_service_`G' != . | ipuI_household_serv_`G' != .
}

** FORMAL WITHIN SECTORS **
gen mfgsh = ipuI_mfg_t/(ipuI_mfg_t+ipuI_fire_t)
sum mfgsh
foreach X in wagewker selfwker unpaidwker unpothwker selfwkerboss selfwkerown {
gen ipuFI_`X'_mfgfire_t = mfgsh*ipuFI_`X'_mfg_t+(1-mfgsh)*ipuFI_`X'_fi_t
}
sum ipuI_fire_t ipuI_mfg_t
* We create MFG+FIRE
gen mfgfireshare = mfgshare+fire2020_ma5_un if mfgshare != . & fire2020_ma5_un != . 
save temp, replace 

*** TABLE D5 ***
foreach G in t {
capture erase "tables/tableD5.xls"
capture erase "tables/tableD5.tex"
capture erase "tables/tableD5.txt"
local lincomset0 "mfgservshare - nrxmean"
local lincomset1 "mfgservshare - negchg_19802000"
foreach V of varlist ipuI_mfg_`G' ipuI_fire_`G' ipuI_mfgfire_`G' ipuI_trade_`G' ipuI_trade2_`G' ipuI_trade2hh_`G' ipuI_govmt_`G' ipuI_govmt2_`G' ipuI_nrx_`G' ipuI_construct_`G' {
foreach Y in 2000 {
foreach X in ag {
gen nrxmean = nrx`X'_mean`Y'
* Urbrate ctrl only *
xi: reg `V' nrxmean mfgservshare negchg_19802000 urbrate larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop`Y'] if year == `Y', robust
regsave nrxmean mfgservshare negchg_19802000 using "table2_`V'", ci level(90) replace pval
proglincom "`lincomset0'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset1'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
outreg2 * using "tables/tableD5.xls", keep(*mfgserv* *nrx* negchg_*) addtext(MFGSERV-NRXGDP, "`coeff1'", se1, "`se1'", MFGSERV-DEINDU, "`coeff2'", se2, "`se2'", diffcoef, "`coeff3'", diffse, "`se3'") se nocons coefastr bdec(2) sdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
drop nrxmean
}
}
}
}

*** TABLE D5 - ROBUSTNESS WITH FIRE ***
foreach G in t {
capture erase "tables/tableD5_robfire.xls"
capture erase "tables/tableD5_robfire.tex"
capture erase "tables/tableD5_robfire.txt"
local lincomset0 "mfgfireshare - nrxmean"
local lincomset1 "mfgfireshare - negchg_19802000"
foreach V of varlist ipuI_mfg_`G' ipuI_fire_`G' ipuI_mfgfire_`G' ipuI_trade_`G' ipuI_trade2_`G' ipuI_trade2hh_`G' ipuI_govmt_`G' ipuI_govmt2_`G' ipuI_nrx_`G' ipuI_construct_`G' {
foreach Y in 2000 {
foreach X in ag {
gen nrxmean = nrx`X'_mean`Y'
* Urbrate ctrl only *
xi: reg `V' nrxmean mfgfireshare negchg_19802000 urbrate larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop`Y'] if year == `Y', robust
proglincom "`lincomset0'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset1'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
outreg2 * using "tables/tableD5_robfire.xls", keep(mfgfireshare *nrx* negchg_*) addtext(MFGFIRE-NRXGDP, "`coeff1'", se1, "`se1'", MFGFIRE-DEINDU, "`coeff2'", se2, "`se2'", diffcoef, "`coeff3'", diffse, "`se3'") se nocons coefastr bdec(2) sdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
drop nrxmean
}
}
}
}
* combining into one dta *
foreach G in t {
foreach V in ipuI_mfg_`G' ipuI_fire_`G' ipuI_mfgfire_`G' ipuI_trade_`G' ipuI_trade2_`G' ipuI_trade2hh_`G' ipuI_govmt_`G' ipuI_govmt2_`G' ipuI_nrx_`G' ipuI_construct_`G' {
use table2_`V', clear
gen depvar = "`V'"
save "Intermediary\table3and4\table2_`V'_2", replace 
}
}
foreach G in t {
use "Intermediary\table3and4\table2_ipuI_mfg_`G'_2", clear 
foreach V in ipuI_fire_`G' ipuI_mfgfire_`G' ipuI_trade_`G' ipuI_trade2_`G' ipuI_trade2hh_`G' ipuI_govmt_`G' ipuI_govmt2_`G' ipuI_nrx_`G' ipuI_construct_`G' {
append using "Intermediary\table3and4\table2_`V'_2"
}
save "Intermediary\table3and4\table2coefs", replace
} 

***** INFORMALITY RESULTS *****

use temp, clear
** Coefficients for figure 6 **
capture erase "table_inf.xls"
capture erase "table_inf.tex"
capture erase "table_inf.txt"
foreach G in t {	
local lincomset0 "mfgservshare - nrxmean"
local lincomset1 "mfgservshare - negchg_19802000"
foreach V of varlist ipuF_wagewker_`G' ipuF_selfwker_`G' {
foreach Y in 2000 {
foreach X in ag {
gen nrxmean = nrx`X'_mean`Y'
* Urbrate ctrl only *
xi: reg `V' nrxmean mfgservshare negchg_19802000 urbrate larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop`Y'] if year == `Y', robust
regsave nrxmean mfgservshare negchg_19802000 using table3_`V', ci level(90) replace pval
proglincom "`lincomset0'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset1'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
outreg2 * using "table_inf.xls", keep(*mfgserv* *nrx* negchg_*) addtext(G, `G', MFGSERV-NRXGDP, "`coeff1'", se1, "`se1'", MFGSERV-DEINDU, "`coeff2'", se2, "`se2'", diffcoef, "`coeff3'", diffse, "`se3'") se nocons coefastr bdec(2) sdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
drop nrxmean
}
}
}
}
* Same with MFG-FIRE *
capture erase "table_inf_robfire.xls"
capture erase "table_inf_robfire.tex"
capture erase "table_inf_robfire.txt"
foreach G in t {	
local lincomset0 "mfgfireshare - nrxmean"
local lincomset1 "mfgfireshare - negchg_19802000"
foreach V of varlist ipuF_wagewker_`G' ipuF_selfwker_`G' {
foreach Y in 2000 {
foreach X in ag {
gen nrxmean = nrx`X'_mean`Y'
* Urbrate ctrl only *
xi: reg `V' nrxmean mfgfireshare negchg_19802000 urbrate larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop`Y'] if year == `Y', robust
proglincom "`lincomset0'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset1'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
outreg2 * using "table_inf_robfire.xls", keep(mfgfireshare *nrx* negchg_*) addtext(G, `G', MFGFIRE-NRXGDP, "`coeff1'", se1, "`se1'", MFGFIRE-DEINDU, "`coeff2'", se2, "`se2'", diffcoef, "`coeff3'", diffse, "`se3'") se nocons coefastr bdec(2) sdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
drop nrxmean
}
}
}
}

** Table D6 **
capture erase "tables/tableD6.xls"
capture erase "tables/tableD6.tex"
capture erase "tables/tableD6.txt"
foreach Y in 2000 {
foreach G in t {
sum ipuFI_wagewker_mfg_`G'  ipuFI_wagewker_tra_`G'  ipuFI_selfwker_mfg_`G' ipuFI_selfwker_tra_`G'
}
}
foreach G in t {
local lincomset0 "mfgservshare - nrxmean"
local lincomset1 "mfgservshare - negchg_19802000"
foreach V of varlist ipuF_wagewker_`G' ipuF_selfwker_`G' ipuF_unpothwker_`G' ipuFI_wagewker_mfg_`G' ipuFI_selfwker_mfg_`G' ipuFI_unpothwker_mfg_`G' ipuFI_wagewker_tra_`G'  ipuFI_selfwker_tra_`G' ipuFI_unpothwker_tra_`G' {
foreach Y in 2000 {
foreach X in ag {
gen nrxmean = nrx`X'_mean`Y'
* Urbrate ctrl only *
xi: reg `V' nrxmean mfgservshare negchg_19802000 urbrate larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop`Y'] if year == `Y', robust
regsave nrxmean mfgservshare negchg_19802000 using table3_`V', ci level(90) replace pval
proglincom "`lincomset0'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset1'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
outreg2 * using "tables/tableD6.xls", keep(*mfgserv* *nrx* negchg_*) addtext(G, `G', MFGSERV-NRXGDP, "`coeff1'", se1, "`se1'", MFGSERV-DEINDU, "`coeff2'", se2, "`se2'", diffcoef, "`coeff3'", diffse, "`se3'") se nocons coefastr bdec(2) sdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
drop nrxmean
}
}
}
}

* We combine the coefficients *
* combining into one dta *
foreach G in t {
foreach V of varlist ipuF_wagewker_`G' ipuF_selfwker_`G' ipuF_unpothwker_`G' ipuFI_wagewker_mfg_`G' ipuFI_selfwker_mfg_`G' ipuFI_unpothwker_mfg_`G' ipuFI_wagewker_tra_`G'  ipuFI_selfwker_tra_`G' ipuFI_unpothwker_tra_`G' {
use table3_`V', clear
gen depvar = "`V'"
save "Intermediary\table3and4\table3_`V'_2", replace 
}
}

foreach G in t {
use "Intermediary\table3and4\table3_ipuF_wagewker_`G'_2", clear 
foreach V in ipuF_selfwker_`G' ipuF_unpothwker_`G' ipuFI_wagewker_mfg_`G' ipuFI_selfwker_mfg_`G' ipuFI_unpothwker_mfg_`G' ipuFI_wagewker_tra_`G'  ipuFI_selfwker_tra_`G' ipuFI_unpothwker_tra_`G' {
append using "Intermediary\table3and4\table3_`V'_2"
}
save "Intermediary\table3and4\table3coefs", replace
} 

*** WE NOW CREATE FIGURE 6 ***

* Preparing data * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *

* Load files
use "intermediary\table3and4\table2coefs.dta", clear
append using "intermediary\table3and4\table3coefs.dta"

* Assigning stars to each coefficient
gen 	stars = ""
replace stars = "*" 	if pval < .10
replace stars = "**" 	if pval < .05
replace stars = "***" 	if pval < .01

* Determine location of coefficients within panels (using rownum) and
* across panels (using panelnum)

gen rownum   = 0
gen panelnum = 1


replace rownum = 1  if (depvar == "ipuI_mfgfire_t")
replace rownum = 2  if (depvar == "ipuI_mfg_t")
replace rownum = 3  if (depvar == "ipuI_fire_t")
replace rownum = 4  if (depvar == "ipuI_trade_t")
replace rownum = 5  if (depvar == "ipuI_trade2_t")
replace rownum = 6  if (depvar == "ipuI_trade2hh_t")
replace rownum = 7  if (depvar == "ipuI_govmt_t")
replace rownum = 8 	if (depvar == "ipuI_govmt2_t")
replace rownum = 9 	if (depvar == "ipuI_nrx_t")
replace rownum = 10 if (depvar == "ipuI_construct_t")

replace panelnum = 2 if rownum == 0

replace rownum = 1  if (depvar == "ipuF_wagewker_t")
replace rownum = 2  if (depvar == "ipuF_selfwker_t")
* --
replace rownum = 3  if (depvar == "ipuFI_wagewker_mfg_t")
replace rownum = 4  if (depvar == "ipuFI_wagewker_tra_t")
replace rownum = 5  if (depvar == "ipuFI_selfwker_mfg_t")
replace rownum = 6  if (depvar == "ipuFI_selfwker_tra_t")
replace rownum = .  if (depvar == "ipuF_unpothwker_t")
replace rownum = .  if (depvar == "ipuFI_unpothwker_mfg_t")
replace rownum = .  if (depvar == "ipuFI_unpothwker_tra_t")
drop if rownum == .

* Rename Columns names 
replace var = "MFGSERV 2000" 		if var == "mfgservshare"
replace var = "NRXGDP 1960-2000" 	if var == "nrxmean"
replace var = "DEINDU 1980-2000" 	if var == "negchg_19802000"

* Generate new variable to orden sub-graphs as desired
gen 	ordvar = 1
replace ordvar = 2 if var == "NRXGDP 1960-2000"
replace ordvar = 3 if var == "DEINDU 1980-2000"

* Mask labels of new var with the values of 'var'
labmask ordvar, values(var)

* Creating Figures * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *

* PART 1 * * * * *
preserve

keep if panelnum == 1

gen invis = ci_upper
gen invislab = ""

replace invislab = "diff -0.27**"	if ordvar == 2 & rownum == 1
replace invislab = "diff -0.20**"	if ordvar == 2 & rownum == 2
replace invislab = "diff -0.07*" 	if ordvar == 2 & rownum == 3
replace invislab = "diff 0.29*" 	if ordvar == 2 & rownum == 4
replace invislab = "diff 0.45***" 	if ordvar == 2 & rownum == 5
replace invislab = "diff 0.44**" 	if ordvar == 2 & rownum == 6
replace invislab = "diff -0.11**" 	if ordvar == 2 & rownum == 7
replace invislab = "diff -0.08" 	if ordvar == 2 & rownum == 8
replace invislab = "diff 0.06"		if ordvar == 2 & rownum == 9
replace invislab = "diff -0.15**" 	if ordvar == 2 & rownum == 10


replace invislab = "diff -0.92**"	if ordvar == 3 & rownum == 1
replace invislab = "diff -0.91**"	if ordvar == 3 & rownum == 2
replace invislab = "diff -0.01" 	if ordvar == 3 & rownum == 3
replace invislab = "diff 0.58^" 	if ordvar == 3 & rownum == 4
replace invislab = "diff 1.17**" 	if ordvar == 3 & rownum == 5
replace invislab = "diff 1.38**" 	if ordvar == 3 & rownum == 6
replace invislab = "diff -0.22^"	if ordvar == 3 & rownum == 7
replace invislab = "diff -0.18" 	if ordvar == 3 & rownum == 8
replace invislab = "diff 0.15" 		if ordvar == 3 & rownum == 9
replace invislab = "diff -0.37*" 	if ordvar == 3 & rownum == 10

twoway ///
	(rcap ci_lower ci_upper rownum, lcolor(black) horizontal 					/// Confidence intervals
			by(ordvar, row(1) xrescale legend(off) note("") colfirst)) 			/// By-Comand styling
	(scatter rownum coef, by(ordvar) mcolor(blue) msymbol(D) msize(medium)		/// Coefficients plotting and styling
			mlabel(stars) mlabcolor(blue) mlabposition(6)						/// Coefficients stars + styling
						  mlabgap(*.5) mlabsize(medium)) 						/// .
	(scatter rownum invis, by(ordvar) msymbol(none) 							/// Coefficients plotting and styling
			mlabel(invislab) mlabcolor(black) mlabposition(3)					/// Coefficients stars + styling
						  mlabgap(*2) mlabsize(small)) 							/// .
	, 																			///			
	ylabel(1  "1.MFG+FIRE" 														/// Labels for variables (Y axis)
		   2  "2.MFG"  															/// .
		   3  "3.FIRE" 															/// .
		   4  "4.UNT1" 															/// .
		   5  "5.UNT2"															/// .
		   6  "6.UNT3"															/// .
		   7  "7.GOVT"															/// .
		   8  "8.GOVT2"															/// .
		   9  "9.NRX"																/// .
		   10 "10.CONST"															/// .
		   , angle(0) nogrid labgap(3)) 										/// .			
	xline(0, lpattern(longdash) lwidth(medium) lcolor(red)) 					///	Vertical line at 0 
	yline(3.5, lpattern(shortdash) lcolor(gs5)) 								/// Horizontal line (separate coefs)
	yline(6.5, lpattern(shortdash) lcolor(gs5)) 								/// Horizontal line (separate coefs)
	subtitle(,bcolor(white) lcol(black)) 										///	Define style of panel subtitles
	plotregion(margin(b=+2)) 													/// Adjust margins of plot
	plotregion(margin(r=+5)) 													/// Adjust margins of plot
	ytitle("") 																	/// Eliminate Y axis title
	yscale(reverse) 															//	Flip Y axis

graph export "Figures/table3and4part1new.png", replace width(3000) height(1908)
graph export "Figures/Figure6A.png", replace width(3000) height(1908)

restore

* PART 2 * * * * *
preserve

keep if panelnum == 2
keep if rownum == 1 | rownum == 2

gen invis = ci_upper
gen invislab = ""

replace invislab = "diff -0.48*"	if ordvar == 2 & rownum == 1
replace invislab = "diff 0.40^"	if ordvar == 2 & rownum == 2

replace invislab = "diff -0.88"		if ordvar == 3 & rownum == 1
replace invislab = "diff 0.80"		if ordvar == 3 & rownum == 2

twoway ///
	(rcap ci_lower ci_upper rownum, lcolor(black) horizontal 					/// Confidence Intervals
			by(ordvar, xrescale row(1) legend(off) note("") colfirst ))			/// By-Comand styling
	(scatter rownum coef, by(ordvar) mcolor(blue) msymbol(D) msize(medium)		/// Coefficients plotting and styling
			mlabel(stars) mlabcolor(blue) mlabposition(6)						/// Coefficients stars + styling
						  mlabgap(*.5) mlabsize(medium)) 						/// .
	(scatter rownum invis, by(ordvar) msymbol(none) 							/// Coefficients plotting and styling
			mlabel(invislab) mlabcolor(black) mlabposition(3)					/// Coefficients stars + styling
						  mlabgap(*2) mlabsize(small)) 							/// .
	, 																			///									
	ylabel(1 "11.WAGE"															/// Labels for variables (Y axis)
		   2 "12.SELF"  														///	.
		   , angle(0) nogrid)													/// .
	xline(0, lpattern(longdash) lwidth(medthin) lcolor(red)) 					///	Vertical line at 0 
	subtitle(,bcolor(white) lcol(black)) 										/// Define style of panel subtitles
	plotregion(margin(b=+2)) 													/// Adjust margins of plot
	plotregion(margin(r=+5)) 													/// Adjust margins of plot
	ytitle("") 																	/// Eliminate Y axis title
	aspect(0.3)																	/// Aspect ratio of plot
	yscale(reverse) 															//	Flip Y axis

graph export "Figures/table3and4part2new.png", replace  width(3000) height(1908)
graph export "Figures/Figure6B.png", replace width(3000) height(1908)

restore

*****************
*****************
**# FIGURE 10 #1
*****************
*****************

***** HUMAN CAPITAL *****

* Source: I2D2 (see I2D2 folder)
* Mean years of education 1995-2005
use "dist_educy_254565_all.dta", clear
keep if year >= 1995 & year <= 2005
count
replace ccode = "XKO" if ccode == "KSV"
replace ccode = "ROU" if ccode == "ROM"
replace ccode = "COD" if ccode == "ZAR"
replace ccode = "TLS" if ccode == "TMP"
replace ccode = "PSE" if ccode == "WBG"
collapse (mean) educy*, by(ccode)
foreach X of varlist educy* {
ren `X' `X'_9505
}
sort ccode
save educ_i2d2_9505, replace

* Source: I2D2 (see I2D2 folder)
* Mean years of education 1990-2010
use "dist_educy_254565_all.dta", clear
keep if year >= 1990 & year <= 2010
count
replace ccode = "XKO" if ccode == "KSV"
replace ccode = "ROU" if ccode == "ROM"
replace ccode = "COD" if ccode == "ZAR"
replace ccode = "TLS" if ccode == "TMP"
replace ccode = "PSE" if ccode == "WBG"
collapse (mean) educy*, by(ccode)
foreach X of varlist educy* {
ren `X' `X'_9010
}
sort ccode
save educ_i2d2_9010, replace

* Source: I2D2 (see I2D2 folder)
* Mean years of education 1990-2017
use "dist_educy_254565_all.dta", clear
keep if year >= 1990
count
replace ccode = "XKO" if ccode == "KSV"
replace ccode = "ROU" if ccode == "ROM"
replace ccode = "COD" if ccode == "ZAR"
replace ccode = "TLS" if ccode == "TMP"
replace ccode = "PSE" if ccode == "WBG"
collapse (mean) educy*, by(ccode)
foreach X of varlist educy* {
ren `X' `X'_9017
}
sort ccode
save educ_i2d2_9017, replace

* Human capital index 2020 fromthe World Bank *
* Url: https://datacatalog.worldbank.org/search/dataset/0038030/Human-Capital-Index
* Last accessed: 03-29-2022
clear
import excel "hci20_simpleformat.xlsx", sheet("Sheet1") firstrow clear
ren countryname country_wb
keep country_wb expyrs testscore learning_yrss* hci20
sort country_wb
save hci20, replace

* Data set created from IPUMS
* See the "IPUMS and other files" folder 
use "dataset_education_a65_ipums_19032022", clear
keep if type_census == "census"
codebook country 
* 65 in 189
sum year
* 1960-2016
keep country year share_*
* We study the unknown 
sum share_unknown, d
* We change the shares
egen test =rsum(share_*)
sum test, d
drop test
* ok
** Not removing the unknowns 
ren share_unknown sh_unkn
ren share_less sh_none
ren share_prim sh_prim
ren share_sec sh_sec
ren share_univ sh_tert
gen sh_primplus = sh_prim+sh_sec+sh_tert
gen sh_secplus = sh_sec+sh_tert
gen sh_tertplus = sh_tert
** Removing the unknowns **
egen test =rsum(sh_none sh_prim sh_sec sh_tert)
foreach X in sh_none sh_prim sh_sec sh_tert {
gen `X'_noukn = `X'/test*100
}
drop test
egen test =rsum(*_noukn)
sum test, d
drop test
gen sh_primplus_noukn = sh_prim_noukn+sh_sec_noukn+sh_tert_noukn
gen sh_secplus_noukn = sh_sec_noukn+sh_tert_noukn
gen sh_tertplus_noukn = sh_tert_noukn
* We now keep the closest to 2000
destring year, replace 
keep if year >= 1990 & year <= 2020
gen dist2000 = abs(year-2000)
gsort +country +dist2000 -year
order country dist2000 year 
bysort country: keep if _n == 1
tab year
* 64
ren country country_gjv
replace country_gjv = "Bolivia (Plurinational State of)" if country_gjv == "Bolivia"
replace country_gjv = "Egypt" if country_gjv == "Egypt, Arab Rep."
replace country_gjv = "Iran (Islamic Republic of)" if country_gjv == "Iran, Islamic Rep."
replace country_gjv = "Lao People's Democratic Republic" if country_gjv == "Lao PDR"
replace country_gjv = "United Republic of Tanzania" if country_gjv == "Tanzania"
replace country_gjv = "Venezuela (Bolivarian Republic of)" if country_gjv == "Venezuela, RB"
replace country_gjv = "Viet Nam" if country_gjv == "Vietnam"
sort country_gjv
save humankcountry, replace
gsort- sh_secplus

* Years schooling in Ipums
use "dataset_education_a65_ipums_19032022", clear
keep census country year type_census
keep census type_census
sort census
save temp, replace

* Created in the folder IPUMS *
use "yrschool.dta", clear
sort census
merge census using temp
tab _m
tab census if _m == 1
drop if _m == 2
drop _m
tab type_census, m
keep if type_census == "census"
* We now keep the closest to 2000
destring year, replace 
keep if year >= 1990 & year <= 2020
gen dist2000 = abs(year-2000)
gsort +country +dist2000 -year
order country dist2000 year 
bysort country: keep if _n == 1
tab year
* 55
ren a65 yrs65
ren a45 yrs45
ren country country_gjv
replace country_gjv = "Bolivia (Plurinational State of)" if country_gjv == "Bolivia"
replace country_gjv = "Egypt" if country_gjv == "Egypt, Arab Rep."
replace country_gjv = "Iran (Islamic Republic of)" if country_gjv == "Iran, Islamic Rep."
replace country_gjv = "Lao People's Democratic Republic" if country_gjv == "Lao PDR"
replace country_gjv = "United Republic of Tanzania" if country_gjv == "Tanzania"
replace country_gjv = "Venezuela (Bolivarian Republic of)" if country_gjv == "Venezuela, RB"
replace country_gjv = "Viet Nam" if country_gjv == "Vietnam"
replace country_gjv = "Lao People's Democratic Republic" if country_gjv == "Laos"
*replace country_gjv = "" if country_gjv == "Palestine"
replace country_gjv = "Papua New Guinea" if country_gjv == "Papua New"
replace country_gjv = "Venezuela (Bolivarian Republic of)" if country_gjv == "Venezuela"
sort country_gjv
save yrsschool2, replace

use LACdata, clear
** MFGSERV1960 **
foreach X in 1960 {
gen mfgservshare_`X' = mfgservshare if year == `X'
bysort ccode: egen mfgservshare`X' = max(mfgservshare_`X')
drop mfgservshare_`X'
}
** POP **
foreach X in 1960 2000 2010 2020 {
gen pop_`X' = pop if year == `X'
bysort ccode: egen pop`X' = max(pop_`X')
drop pop_`X'
}
foreach X in 2020 {
gen upop_`X' = upop if year == `X'
bysort ccode: egen upop`X' = max(upop_`X')
drop upop_`X'
}
gen pop1 = 1
gen pop201020 = pop2010 if year == 2010
replace pop201020 = pop2020 if year == 2020
sum pop201020 pop1960 pop1
foreach X in 1960 2000 2010 2020 {
gen lpcgdp_`X' = lpcgdp if year == `X'
bysort ccode: egen lpcgdp`X' = max(lpcgdp_`X')
drop lpcgdp_`X'
}
** DEINDUSTRIALIZATION **
foreach X in 1980 2000 2020 {
gen indu_`X' = mfgshare if year == `X'
bysort country_gjv: egen indu`X' = max(indu_`X')
drop indu_`X'
}
foreach X in 2000 2020 {
gen chg_1980`X' = indu`X' - indu1980
gen neg = (chg_1980`X' < 0)
gen negchg_1980`X' = 0 
replace negchg_1980`X' = chg_1980`X' if neg == 1
replace negchg_1980`X' = -negchg_1980`X'
sum negchg_1980`X', d
drop neg
}
* We add the data 
sort country_gjv 
merge country_gjv using humankcountry
tab _m
drop _m
sort country_gjv 
merge country_gjv using yrsschool2
tab _m
tab country_gjv if _m == 2
tab country_gjv if _m == 1
drop _m
sort country_wb
merge country_wb using hci20
tab _m
tab country_wb if _m == 1
* ok, I checked, they are really missing
*tab country_wb if _m == 2
drop if _m == 2
drop _m
* Learning-Adjusted Years of School are calculated by multiplying the estimates of expected years of school by the ratio of most recent harmonized test scores to 625, where 625 corresponds to advanced attainment on the TIMSS (Trends in International Mathematics and Science Study) test, based on methodology in Filmer et al. (2018). 
sum testscore, d
* 305 to 575
* We create this file in I2D2 folder
sort ccode
merge ccode using "ret_uret"
tab _m
tab ccode if _m == 2
drop _m
* I2D2
foreach X in 9505 9010 9017 {
sort ccode
merge ccode using educ_i2d2_`X'
tab _m
tab ccode if _m == 1
tab ccode if _m == 2
drop if _m == 2
drop _m
}
foreach V of varlist sh_primplus-sh_tertplus sh_primplus_noukn-sh_tertplus_noukn educy_25* yrs65 yrs45 {
gen `V'_hci = `V'*testscore/625
}
* yrs of educ * ret 
foreach X of varlist retedu* {
replace `X' = `X'*100
}
foreach L in cntry reg01 reg02 reg03 psu {
foreach X of varlist educy_2565_9505 educy_2565_9010 educy_2565_9017 yrs65 yrs45 {
gen r_`X'_`L' = `X'*retedu_uret_`L'
}  
}
* We create MFG+FIRE
gen mfgfireshare = mfgshare+fire2020_ma5_un if mfgshare != . & fire2020_ma5_un != . 

***** TABLE D15 PANEL A *****

capture erase "tables\tableD15A.xls"
capture erase "tables\tableD15A.tex"
capture erase "tables\tableD15A.txt"
local lincomset0 "nrxmean - mfgservshare"
local lincomset1 "negchg_19802000 - mfgservshare"
foreach V of varlist yrs65 testscore yrs65_hci retedu_uret_cntry r_yrs65_cntry {
foreach Y in 2000 {
foreach X in ag {
gen nrxmean = nrx`X'_mean`Y'
* Urbrate ctrl only *
xi: reg `V' mfgservshare nrxmean negchg_19802000 urbrate larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop`Y'] if year == `Y', robust
regsave mfgservshare nrxmean negchg_19802000 using table9_`V', replace ci level(90) pval
proglincom "`lincomset0'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset1'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
outreg2 * using "tables\tableD15A.xls", keep(*mfgserv* *nrx* negchg_*) addtext(NRXGDP-MFGSERV, "`coeff1'", se1, "`se1'", DEINDU-MFGSERV, "`coeff2'", se2, "`se2'", diffcoef, "`coeff3'", diffse, "`se3'") se nocons coefastr bdec(2) sdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
drop nrxmean
}
}
}
** ROBUSTNESS-FIRE **
capture erase "tables\tableD15A_robfire.xls"
capture erase "tables\tableD15A_robfire.tex"
capture erase "tables\tableD15A_robfire.txt"
local lincomset0 "nrxmean - mfgfireshare"
local lincomset1 "negchg_19802000 - mfgfireshare"
foreach V of varlist yrs65 testscore yrs65_hci retedu_uret_cntry r_yrs65_cntry {
foreach Y in 2000 {
foreach X in ag {
gen nrxmean = nrx`X'_mean`Y'
* Urbrate ctrl only *
xi: reg `V' mfgfireshare nrxmean negchg_19802000 urbrate larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop`Y'] if year == `Y', robust
proglincom "`lincomset0'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset1'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
outreg2 * using "tables\tableD15A_robfire.xls", keep(mfgfireshare *nrx* negchg_*) addtext(NRXGDP-MFGSERV, "`coeff1'", se1, "`se1'", DEINDU-MFGSERV, "`coeff2'", se2, "`se2'", diffcoef, "`coeff3'", diffse, "`se3'") se nocons coefastr bdec(2) sdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
drop nrxmean
}
}
}

save temp, replace

* We combine the coefficients *
foreach V in yrs65 testscore yrs65_hci retedu_uret_cntry r_yrs65_cntry {
use table9_`V', clear
gen depvar = "`V'"
save table9_`V'_2, replace 
}
use table9_yrs65_2, clear
foreach V in testscore yrs65_hci retedu_uret_cntry r_yrs65_cntry {
append using table9_`V'_2
}
save "intermediary\table9\table9coefs_panelA", replace

***** TABLE D15 PANEL B *****
 
use temp, clear
capture erase "tables\tableD15B.xls"
capture erase "tables\tableD15B.tex"
capture erase "tables\tableD15B.txt"
local lincomset0 "nrxmean - mfgservshare"
local lincomset1 "negchg_19802000 - mfgservshare"
foreach V of varlist retedu_uret_reg01 retedu_uret_reg02 retedu_uret_psu r_yrs65_reg01 r_yrs65_reg02 {
foreach Y in 2000 {
foreach X in ag {
gen nrxmean = nrx`X'_mean`Y'
* Urbrate ctrl only *
xi: reg `V' mfgservshare nrxmean negchg_19802000 urbrate larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop`Y'] if year == `Y', robust
regsave mfgservshare nrxmean negchg_19802000 using table9_`V', replace ci level(90) pval
proglincom "`lincomset0'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset1'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
outreg2 * using "tables\tableD15B.xls", keep(*mfgserv* *nrx* negchg_*) addtext(NRXGDP-MFGSERV, "`coeff1'", se1, "`se1'", DEINDU-MFGSERV, "`coeff2'", se2, "`se2'", diffcoef, "`coeff3'", diffse, "`se3'") se nocons coefastr bdec(2) sdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
drop nrxmean
}
}
}
** ROBUSTNESS - FIRE **
capture erase "tables\tableD15B_robfire.xls"
capture erase "tables\tableD15B_robfire.tex"
capture erase "tables\tableD15B_robfire.txt"
local lincomset0 "nrxmean - mfgfireshare"
local lincomset1 "negchg_19802000 - mfgfireshare"
foreach V of varlist retedu_uret_reg01 retedu_uret_reg02 retedu_uret_psu r_yrs65_reg01 r_yrs65_reg02 {
foreach Y in 2000 {
foreach X in ag {
gen nrxmean = nrx`X'_mean`Y'
* Urbrate ctrl only *
xi: reg `V' mfgfireshare nrxmean negchg_19802000 urbrate larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop`Y'] if year == `Y', robust
proglincom "`lincomset0'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset1'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
outreg2 * using "tables\tableD15B_robfire.xls", keep(mfgfireshare *nrx* negchg_*) addtext(NRXGDP-MFGSERV, "`coeff1'", se1, "`se1'", DEINDU-MFGSERV, "`coeff2'", se2, "`se2'", diffcoef, "`coeff3'", diffse, "`se3'") se nocons coefastr bdec(2) sdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
drop nrxmean
}
}
}

***** TABLE D15 PANEL C *****

use temp, clear
capture erase "tables\tableD15C.xls"
capture erase "tables\tableD15C.tex"
capture erase "tables\tableD15C.txt"
local lincomset0 "nrxmean - mfgservshare"
local lincomset1 "negchg_19802000 - mfgservshare"
foreach V of varlist r_yrs65_psu retexp_uret_cntry retexp_uret_reg01 retexp_uret_reg02 retexp_uret_psu {
foreach Y in 2000 {
foreach X in ag {
gen nrxmean = nrx`X'_mean`Y'
* Urbrate ctrl only *
xi: reg `V' mfgservshare nrxmean negchg_19802000 urbrate larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop`Y'] if year == `Y', robust
regsave mfgservshare nrxmean negchg_19802000 using table9_`V', replace ci level(90) pval
proglincom "`lincomset0'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset1'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
outreg2 * using "tables\tableD15C.xls", keep(*mfgserv* *nrx* negchg_*) addtext(NRXGDP-MFGSERV, "`coeff1'", se1, "`se1'", DEINDU-MFGSERV, "`coeff2'", se2, "`se2'", diffcoef, "`coeff3'", diffse, "`se3'") se nocons coefastr bdec(2) sdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
drop nrxmean
}
}
}
** ROBUSTNESS - FIRE **
capture erase "tables\tableD15C_robfire.xls"
capture erase "tables\tableD15C_robfire.tex"
capture erase "tables\tableD15C_robfire.txt"
local lincomset0 "nrxmean - mfgfireshare"
local lincomset1 "negchg_19802000 - mfgfireshare"
foreach V of varlist r_yrs65_psu retexp_uret_cntry retexp_uret_reg01 retexp_uret_reg02 retexp_uret_psu {
foreach Y in 2000 {
foreach X in ag {
gen nrxmean = nrx`X'_mean`Y'
* Urbrate ctrl only *
xi: reg `V' mfgfireshare nrxmean negchg_19802000 urbrate larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop`Y'] if year == `Y', robust
proglincom "`lincomset0'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset1'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
outreg2 * using "tables\tableD15C_robfire.xls", keep(mfgfireshare *nrx* negchg_*) addtext(NRXGDP-MFGSERV, "`coeff1'", se1, "`se1'", DEINDU-MFGSERV, "`coeff2'", se2, "`se2'", diffcoef, "`coeff3'", diffse, "`se3'") se nocons coefastr bdec(2) sdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
drop nrxmean
}
}
}

* We combine the coefficients *
foreach V in retexp_uret_cntry retexp_uret_reg01 retexp_uret_reg02 retexp_uret_psu retedu_uret_cntry retedu_uret_reg01 retedu_uret_reg02 retedu_uret_psu r_yrs65_cntry r_yrs65_reg01 r_yrs65_reg02 r_yrs65_psu {
use table9_`V', clear
gen depvar = "`V'"
save table9_`V'_2, replace 
}
use table9_retexp_uret_cntry_2, clear
foreach V in retexp_uret_reg01 retexp_uret_reg02 retexp_uret_psu retedu_uret_cntry	retedu_uret_reg01 retedu_uret_reg02	retedu_uret_psu r_yrs65_cntry r_yrs65_reg01	r_yrs65_reg02 r_yrs65_psu {
append using table9_`V'_2
}
save "intermediary\table9\table9coefs_panelsBC", replace


***** FIGURE 10 *****

use "intermediary\table9\table9coefs_panelA", clear
append using "intermediary\table9\table9coefs_panelsBC"

* Assigning stars to each coefficient
gen 	stars = ""
replace stars = "*" 	if pval < .10
replace stars = "**" 	if pval < .05
replace stars = "***" 	if pval < .01

* Determine row number in plot and panels

* rownum determines position of coefficients within plot
gen rownum = 0

* subfigure determines position of coeficientes across plots
gen subfigure = 0

replace rownum = 1  if (depvar == "yrs65")
replace rownum = 2  if (depvar == "yrs65_hci")

replace subfigure = 1 if depvar == "yrs65" | 									///
						 depvar == "yrs65_hci"
						 
replace rownum = 1  if (depvar == "retedu_uret_cntry")
replace rownum = 0  if (depvar == "retedu_uret_reg01")
replace rownum = 2  if (depvar == "retedu_uret_reg02")
replace rownum = 0  if (depvar == "retedu_uret_psu")

replace subfigure = 2 if depvar == "retedu_uret_cntry"	   | 					///
						 depvar == "retedu_uret_reg01"	   |					///
						 depvar == "retedu_uret_reg02"	   |					///
						 depvar == "retedu_uret_psu"

replace rownum = 1  if (depvar == "r_yrs65_cntry")
replace rownum = 0  if (depvar == "r_yrs65_reg01")
replace rownum = 2  if (depvar == "r_yrs65_reg02")
replace rownum = 0  if (depvar == "r_yrs65_psu")

replace subfigure = 3 if depvar == "r_yrs65_cntry" 	   |	 					///
						 depvar == "r_yrs65_reg01"	   |						///
						 depvar == "r_yrs65_reg02"	   |						///
						 depvar == "r_yrs65_psu"
						 
replace rownum = 1  if (depvar == "retexp_uret_cntry")
replace rownum = 0  if (depvar == "retexp_uret_reg01")
replace rownum = 2  if (depvar == "retexp_uret_reg02")
replace rownum = 0  if (depvar == "retexp_uret_psu")

replace subfigure = 4 if depvar == "retexp_uret_cntry" 	   | 					///
						 depvar == "retexp_uret_reg01"	   |					///
						 depvar == "retexp_uret_reg02"	   |					///
						 depvar == "retexp_uret_psu"

drop if rownum == 0
						 
* Rename Columns names 
replace var = "MFGSERV 2000" 		if var == "mfgservshare"
replace var = "NRXGDP 1960-2000" 	if var == "nrxmean"
replace var = "DEINDU 1980-2000" 	if var == "negchg_19802000"

* Generate new variable to orden sub-graphs as desired
gen 	ordvar = 1
replace ordvar = 2 if var == "NRXGDP 1960-2000"
replace ordvar = 3 if var == "DEINDU 1980-2000"

* Mask labels of new var with the values of 'var'
labmask ordvar, values(var)

*****

preserve

keep if subfigure == 1

gen invis = ci_upper
gen invislab = ""

replace invislab = "diff 0.03"	if ordvar == 2 & rownum == 1
replace invislab = "diff 0.01"	if ordvar == 2 & rownum == 2

replace invislab = "diff 0.11"	if ordvar == 3 & rownum == 1
replace invislab = "diff 0.11"	if ordvar == 3 & rownum == 2

twoway ///
	(rcap ci_lower ci_upper rownum, lcolor(black) horizontal 					/// Confidence Intervals
			by(ordvar, xrescale row(1) legend(off) note("") colfirst ))			/// By-Comand styling
	(scatter rownum coef, by(ordvar) mcolor(blue) msymbol(D) msize(medium)		/// Coefficients plotting and styling
			mlabel(stars) mlabcolor(blue) mlabposition(6)						/// Coefficients stars + styling
						  mlabgap(*.5) mlabsize(medium)) 						/// .
	(scatter rownum invis, by(ordvar) msymbol(none) 							/// Coefficients plotting and styling
			mlabel(invislab) mlabcolor(black) mlabposition(3)					/// Coefficients stars + styling
						  mlabgap(*2) mlabsize(small)) 							/// .
	, 																			///									
	ylabel(1 "1.Avg # Yrs Educ"													/// Labels for variables (Y axis)
		   2 "2.Yrs Educ x Test"  												///	.
		   , angle(0) nogrid)													/// .
	xline(0, lpattern(longdash) lwidth(medthin) lcolor(red)) 					///	Vertical line at 0 
	subtitle(,bcolor(white) lcol(black)) 										/// Define style of panel subtitles
	plotregion(margin(b=+2)) 													/// Adjust margins of plot
	plotregion(margin(r=+5)) 													/// Adjust margins of plot
	ytitle("") 																	/// Eliminate Y axis title
	aspect(0.3)																	/// Aspect ratio of plot
	yscale(reverse) 															//	Flip Y axis

graph export "Figures/tablehumank_part1.png", replace  width(3000) height(1908)
graph export "Figures/figure10a.png", replace  width(3000) height(1908)

restore

*****

preserve

keep if subfigure == 2

gen invis = ci_upper
gen invislab = ""

replace invislab = "diff -0.05^"	if ordvar == 2 & rownum == 1
*replace invislab = "diff -0.14*"	if ordvar == 2 & rownum == 
replace invislab = "diff -0.04^"	if ordvar == 2 & rownum == 2
*replace invislab = "diff 0.03"		if ordvar == 2 & rownum == 

replace invislab = "diff -0.18"		if ordvar == 3 & rownum == 1
*replace invislab = "diff -0.09"	if ordvar == 3 & rownum == 
replace invislab = "diff -0.08"		if ordvar == 3 & rownum == 2
*replace invislab = "diff -0.18"	if ordvar == 3 & rownum == 

twoway ///
	(rcap ci_lower ci_upper rownum, lcolor(black) horizontal 					/// Confidence Intervals
			by(ordvar, xrescale row(1) legend(off) note("") colfirst ))			/// By-Comand styling
	(scatter rownum coef, by(ordvar) mcolor(blue) msymbol(D) msize(medium)		/// Coefficients plotting and styling
			mlabel(stars) mlabcolor(blue) mlabposition(6)						/// Coefficients stars + styling
						  mlabgap(*.5) mlabsize(medium)) 						/// .
	(scatter rownum invis, by(ordvar) msymbol(none) 							/// Coefficients plotting and styling
			mlabel(invislab) mlabcolor(black) mlabposition(3)					/// Coefficients stars + styling
						  mlabgap(*2) mlabsize(small)) 							/// .
	, 																			///									
	ylabel(1 "3.Return Educ"													/// Labels for variables (Y axis)
		   2 "4.Return Educ REG"  												///	.
		   , angle(0) nogrid)													/// .
	xline(0, lpattern(longdash) lwidth(medthin) lcolor(red)) 					///	Vertical line at 0 
	subtitle(,bcolor(white) lcol(black)) 										/// Define style of panel subtitles
	plotregion(margin(b=+2)) 													/// Adjust margins of plot
	plotregion(margin(r=+5)) 													/// Adjust margins of plot
	xlabel(-0.2 (0.2) 0.4)														/// Adjust X axis range manually
	ytitle("") 																	/// Eliminate Y axis title
	aspect(0.3)																	/// Aspect ratio of plot
	yscale(reverse) 															//	Flip Y axis

graph export "Figures/tablehumank_part2.png", replace  width(3000) height(1908)
graph export "Figures/figure10b.png", replace  width(3000) height(1908)

restore

*****

preserve

keep if subfigure == 3

gen invis = ci_upper
gen invislab = ""

replace invislab = "diff -0.05"	if ordvar == 2 & rownum == 1
*replace invislab = "diff -1.35"	if ordvar == 2 & rownum == 2
replace invislab = "diff 0.02"	if ordvar == 2 & rownum == 2
*replace invislab = "diff 0.32"	if ordvar == 2 & rownum == 4

replace invislab = "diff -2.00*"	if ordvar == 3 & rownum == 1
*replace invislab = "diff -1.89^"	if ordvar == 3 & rownum == 2
replace invislab = "diff -1.14"	if ordvar == 3 & rownum == 2
*replace invislab = "diff 2.22"	if ordvar == 3 & rownum == 4

twoway ///
	(rcap ci_lower ci_upper rownum, lcolor(black) horizontal 					/// Confidence Intervals
			by(ordvar, xrescale row(1) legend(off) note("") colfirst ))			/// By-Comand styling
	(scatter rownum coef, by(ordvar) mcolor(blue) msymbol(D) msize(medium)		/// Coefficients plotting and styling
			mlabel(stars) mlabcolor(blue) mlabposition(6)						/// Coefficients stars + styling
						  mlabgap(*.5) mlabsize(medium)) 						/// .
	(scatter rownum invis, by(ordvar) msymbol(none) 							/// Coefficients plotting and styling
			mlabel(invislab) mlabcolor(black) mlabposition(3)					/// Coefficients stars + styling
						  mlabgap(*2) mlabsize(small)) 							/// .
	, 																			///									
	ylabel(1 "5.Yrs Educ*Ret"													/// Labels for variables (Y axis)
		   2 "6.Yrs Educ*Ret REG"	 											///	.
		   , angle(0) nogrid)													/// .
	xline(0, lpattern(longdash) lwidth(medthin) lcolor(red)) 					///	Vertical line at 0 
	subtitle(,bcolor(white) lcol(black)) 										/// Define style of panel subtitles
	plotregion(margin(b=+2)) 													/// Adjust margins of plot
	plotregion(margin(r=+5)) 													/// Adjust margins of plot
	xlabel(-2 (2) 4)															/// Adjust X axis range manually
	ytitle("") 																	/// Eliminate Y axis title
	aspect(0.3)																	/// Aspect ratio of plot
	yscale(reverse) 															//	Flip Y axis
graph export "Figures/tablehumank_part3.png", replace width(3000) height(1908)
graph export "Figures/figure10c.png", replace  width(3000) height(1908)

restore

*****

preserve

keep if subfigure == 4

gen invis = ci_upper
gen invislab = ""

replace invislab = "diff -0.03***"	if ordvar == 2 & rownum == 1
*replace invislab = "diff 0.00"		if ordvar == 2 & rownum == 
replace invislab = "diff -0.03***"	if ordvar == 2 & rownum == 2
*replace invislab = "diff -0.00"		if ordvar == 2 & rownum == 

replace invislab = "diff -0.03"		if ordvar == 3 & rownum == 1
*replace invislab = "diff -0.02"		if ordvar == 3 & rownum == 
replace invislab = "diff -0.04"		if ordvar == 3 & rownum == 2
*replace invislab = ""	if ordvar == 3 & rownum == 

twoway ///
	(rcap ci_lower ci_upper rownum, lcolor(black) horizontal 					/// Confidence Intervals
			by(ordvar, xrescale row(1) legend(off) note("") colfirst ))			/// By-Comand styling
	(scatter rownum coef, by(ordvar) mcolor(blue) msymbol(D) msize(medium)		/// Coefficients plotting and styling
			mlabel(stars) mlabcolor(blue) mlabposition(6)						/// Coefficients stars + styling
						  mlabgap(*.5) mlabsize(medium)) 						/// .
	(scatter rownum invis, by(ordvar) msymbol(none) 							/// Coefficients plotting and styling
			mlabel(invislab) mlabcolor(black) mlabposition(3)					/// Coefficients stars + styling
						  mlabgap(*2) mlabsize(small)) 							/// .
	, 																			///									
	ylabel(1 "7.Return to Experience"											/// Labels for variables (Y axis)
		   2 "8.Ret Exp REG"	 												///	.
		   , angle(0) nogrid)													/// .
	xline(0, lpattern(longdash) lwidth(medthin) lcolor(red)) 					///	Vertical line at 0 
	subtitle(,bcolor(white) lcol(black)) 										/// Define style of panel subtitles
	plotregion(margin(b=+2)) 													/// Adjust margins of plot
	plotregion(margin(r=+5)) 													/// Adjust margins of plot
	ytitle("") 																	/// Eliminate Y axis title
	aspect(0.3)																	/// Aspect ratio of plot
	yscale(reverse) 															//	Flip Y axis
*	saving(graphtab9, replace)													// Save for combining later
	
restore

graph export "Figures/tablehumank_part4.png", replace width(3000) height(1908)
graph export "Figures/figure10d.png", replace  width(3000) height(1908)

*****************
*****************
**# FIGURE 8 #1
*****************
*****************

** MAIN VARIABLES COUNTRY LEVEL (N = 116) **
use LACdata, clear
** POP **
foreach X in 1960 2000 2010 2020 {
gen pop_`X' = pop if year == `X'
bysort ccode: egen pop`X' = max(pop_`X')
drop pop_`X'
}
gen pop1 = 1
gen pop201020 = pop2010 if year == 2010
replace pop201020 = pop2020 if year == 2020
sum pop201020 pop1960 pop1
foreach X in 1960 2000 2010 2020 {
gen lpcgdp_`X' = lpcgdp if year == `X'
bysort ccode: egen lpcgdp`X' = max(lpcgdp_`X')
drop lpcgdp_`X'
}
** NRX **
foreach Y in 2000 {
foreach X in ag {
gen nrxmean = nrx`X'_mean`Y'
}
}
** DEINDUSTRIALIZATION **
foreach X in 1980 2000 2020 {
gen indu_`X' = mfgshare if year == `X'
bysort country_gjv: egen indu`X' = max(indu_`X')
drop indu_`X'
}
gen chg_19802020 = indu2020 - indu1980
gen neg = (chg_19802020 < 0)
gen negchg_19802020 = 0 
replace negchg_19802020 = chg_19802020 if neg == 1
replace negchg_19802020 = -negchg_19802020
drop neg
gen chg_19802000 = indu2000 - indu1980
gen neg = (chg_19802000 < 0)
gen negchg_19802000 = 0 
replace negchg_19802000 = chg_19802000 if neg == 1
replace negchg_19802000 = -negchg_19802000
* CONTROLS
* xi: reg `V' i.LAC|nrxmean i.LAC|mfgservshare urbrate larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop`Y'] if year == `Y', robust
keep if year == 2000
keep country_wb LAC nrxmean mfgservshare lpcgdp2000 meanlpcgdp19602000 negchg_19802000 urbrate larea larea_sq lpop lpop_sq smallisland type threshold lthreshold_level pop2000
sort country_wb
save temp_ctrls, replace
count
* 116

** CITY-LEVEL DATA **
* Created in the Mapping folder
use data_for_mapping, clear
* We add the country-level variables
sort country_wb
merge country_wb using temp_ctrls
tab _m
tab country_wb if _m == 1
tab country_wb if _m == 2
drop if _m == 2
drop if _m == 1
drop _m
gsort country_wb -fua_pop
order country_wb fua_pop
bysort country_wb: gen lar1 = (_n == 1)
bysort country_wb: gen lar2 = (_n == 1 | _n == 2)
bysort country_wb: gen lar5 = (_n >= 1 & _n <= 5)
ren capital cap
gen nr = nrxmean
gen ms = mfgservshare
gen de = negchg_19802000
foreach X in trade tradeotserv tradehhserv tradebothserv tradebothserv2 {
ren dist2world_`X' dist2word_`X'
}

** MAIN SECTORS **
capture erase "table.xls"
capture erase "table.tex"
capture erase "table.txt"
foreach V of varlist dist2word* {
local lincomset "mfgservshare-nrxmean"
* No ctrl *
xi: reg `V' nrxmean mfgservshare [w=fua_pop], robust clust(country_wb)
proglincom "`lincomset'"
outreg2 nrxmean mfgservshare using "table.xls", keep(nrxmean mfgservshare) addtext(nrxcoef, `r(coefftxt)', nrxse, `r(setxt)') se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
* Other controls *
xi: reg `V' nrxmean mfgservshare larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=fua_pop], robust clust(country_wb)
proglincom "`lincomset'"
outreg2 nrxmean mfgservshare using "table.xls", keep(nrxmean mfgservshare) addtext(nrxcoef, `r(coefftxt)', nrxse, `r(setxt)') se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
** DEINDU **
local lincomset0 "mfgservshare-nrxmean"
local lincomset1 "mfgservshare-negchg_19802000"
* No ctrl *
xi: reg `V' nrxmean mfgservshare negchg_19802000 [w=fua_pop], robust clust(country_wb)
proglincom "`lincomset0'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset1'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
outreg2 * using "table.xls", keep(*nrx* *mfgserv* negchg_19802000) addtext(nrxcoef, "`coeff1'", nrxse, "`se1'", mfgservcoef, "`coeff2'", mfgservse, "`se2'") se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
* Other controls *
xi: reg `V' nrxmean mfgservshare negchg_19802000 larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=fua_pop], robust clust(country_wb)
proglincom "`lincomset0'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset1'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
outreg2 * using "table.xls", keep(*nrx* *mfgserv* negchg_19802000) addtext(nrxcoef, "`coeff1'", nrxse, "`se1'", mfgservcoef, "`coeff2'", mfgservse, "`se2'") se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
}

** MAIN SECTORS BY CATSIZE **
capture erase "table.xls"
capture erase "table.tex"
capture erase "table.txt"
capture erase "table_coefonly.xls"
capture erase "table_coefonly.tex"
capture erase "table_coefonly.txt"
foreach V of varlist dist2word_mfgfire dist2word_trade* {
local lincomset "mfgservshare-nrxmean"
* No ctrl *
xi: reg `V' i.catsize|nrxmean i.catsize|mfgservshare [w=fua_pop], robust clust(country_wb)
proglincom "`lincomset'"
* Other controls *
xi: reg `V' i.catsize|nrxmean i.catsize|mfgservshare larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=fua_pop], robust clust(country_wb)
proglincom "`lincomset'"
** DEINDU **
local lincomset0 "mfgservshare-nrxmean"
local lincomset1 "mfgservshare-negchg_19802000"
* No ctrl *
xi: reg `V' i.catsize|nrxmean i.catsize|mfgservshare i.catsize|negchg_19802000 [w=fua_pop], robust clust(country_wb)
proglincom "`lincomset0'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset1'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
* Other controls *
xi: reg `V' i.catsize|nrxmean i.catsize|mfgservshare i.catsize|negchg_19802000 larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=fua_pop], robust clust(country_wb)
proglincom "`lincomset0'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset1'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
outreg2 * using "table.xls", keep(*nrx* *mfgserv* negchg_19802000.0 _IcatX*) addtext(nrxcoef, "`coeff1'", nrxse, "`se1'", mfgservcoef, "`coeff2'", mfgservse, "`se2'") se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
xi: reg `V' i.catsize|nr i.catsize|ms i.catsize|de larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=fua_pop], robust clust(country_wb)
regsave using coefonly_`V', replace pval ci
}

foreach S in mfgfire trade tradeotserv tradehhserv tradebothserv tradebothserv2 {
foreach X in nr ms de {
use coefonly_dist2word_`S', clear
keep if _n <= 27
gen type = substr(var,-4,2)
replace type = var if type == ""
tab type
keep if type == "`X'"
gen cat = _n 
gen coef1 = coef if cat == 1
egen maxcoef1 = max(coef1)
replace maxcoef1 = 0 if cat == 1
gen coefag = coef+maxcoef1 
ren coefag coef_`X'
keep cat coef_`X'
sort cat
save coef_`X', replace
}
use coef_nr, clear
foreach X in ms de {
sort cat
merge cat using coef_`X'
drop _m
}
sort cat 
save agcoef_`S', replace
}

* FIGURE 8A: MFGFIRE
use agcoef_mfgfire, clear
twoway (line coef_ms cat, lcolor(black) lwidth(vthick))(line coef_nr cat, lcolor(black) lwidth(vthick) lpattern(dash)) (line coef_de cat, lcolor(gs8) lwidth(vthick)), legend(order(1 "MFGSERV" 2 "NRXGDP" 3 "DEINDU") row(1) position(6)) xlabel(1(1)9) xtitle(City Population Size Category, margin(medsmall)) ytitle(Employment Share (Relative to the World), margin(medsmall)) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white) margin(small)) ylabel(-0.6(0.2)0.4)
graph export fig_world_mfgfire.png, replace width(2620) height(1908)
graph export "Figures\figure8A.png", replace width(2620) height(1908)

* FIGURE 8B: NTR2
use agcoef_tradeotserv, clear
twoway (line coef_ms cat, lcolor(black) lwidth(vthick))(line coef_nr cat, lcolor(black) lwidth(vthick) lpattern(dash)) (line coef_de cat, lcolor(gs8) lwidth(vthick)), legend(order(1 "MFGSERV" 2 "NRXGDP" 3 "DEINDU") row(1) position(6)) xlabel(1(1)9) xtitle(City Population Size Category, margin(medsmall)) ytitle(Employment Share (Relative to the World), margin(medsmall)) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white) margin(small)) ylabel(-0.1(0.2)1.5)
graph export "Figures\figure8B.png", replace width(2620) height(1908)

*****************
*****************
**# TABLE D7 #2
*****************
*****************

* Years schools in ipums
use "dataset_education_a65_ipums_19032022", clear
keep census country year type_census
keep census type_census
sort census
save temp, replace

use LACdata, clear
tab year
* We keep every 5 years
keep country_gjv year LAC regionshort nrxag_sh_gdp mfgshare mfgservshare urbrate lpcgdp pop lpop lpop_sq 
** DEINDUSTRIALIZATION **
foreach X in 1980 {
gen indu_`X' = mfgshare if year == `X'
bysort country_gjv: egen indu`X' = max(indu_`X')
drop indu_`X'
}
gen indu = mfgshare
gen chg_1980t = indu - indu1980
gen neg = (chg_1980t < 0)
gen negchg_1980t = 0 
replace negchg_1980t = chg_1980t if neg == 1
replace negchg_1980t = -negchg_1980t
replace negchg_1980t = 0 if year <= 1980
sum negchg_1980t, d
ren year period
foreach X in nrxag_sh_gdp mfgservshare negchg_1980t {
sort country_gjv period
bysort country_gjv: gen lag10`X' = `X'[_n-2]
sort country_gjv period
bysort country_gjv: gen lag20`X' = `X'[_n-4]
sort country_gjv period
bysort country_gjv: gen lag30`X' = `X'[_n-6]
sort country_gjv period
bysort country_gjv: gen lag40`X' = `X'[_n-8]
}
sort country_gjv period
save listobs, replace

** Human capital - IPUMS ** 
use "yrschool.dta", clear
sort census
merge census using temp
tab _m
tab census if _m == 1
drop if _m == 2
drop _m
tab type_census, m
keep if type_census == "census"
count
codebook country
* 151 54
keep country year a65 a45
destring year, replace
replace country = "Venezuela, RB" if country == "Venezuela"
replace country = "Papua New Guinea" if country == "Papua New"
sort country year
save yrschool_panel, replace
count
* 151

* We combine with the "super" IPUMS data set
use superipums, clear
sort country year
merge country year using yrschool_panel
tab _m
drop _m
save superipums2, replace

***** RESULTS *****

use superipums2, clear
sort country_gjv year
gen nrx_t = agri_t + mining_t
codebook nrx_t
*order country* year nrx_t agri_t mining_t
keep if nrx_t != .
count
* 182
gen period = round(year,5)
tab year period
sort country_gjv period 
merge country_gjv period using listobs
tab _m
drop if _m == 2
drop _m
sort country_gjv year
bysort country_gjv: gen diffyear = year - year[_n-1]
sum diffyear [w=pop]
* 7
** MFG+FIRE **
egen fire_t = rsum(finance_t buss_service_t)
gen mfgfire_t = mfg_t + fire_t
** TRADE **
egen trade2_t = rsum(trade_t other_services_t) if trade_t != . | other_services_t != .
egen trade3_t = rsum(trade_t other_services_t household_serv_t) if trade_t != . | other_services_t != . | household_serv_t != .
** GVT **
gen govmt2_t = govmt_t+educ_t+health_t 

** SELECTED TABLE - SECTORAL L **
capture erase "tables\tableD7.xls"
capture erase "tables\tableD7.tex"
capture erase "tables\tableD7.txt"
foreach G in t {
foreach X of varlist mfg_t fire_t mfgfire_t trade_t trade2_t trade3_t govmt_t govmt2_t nrx_t construct_t wagewker_t selfwker_t unpothwker_t {
desc `X'
gen count = 1 if `X' != . 
bysort country_gjv: egen sumcount = sum(count)
foreach N in 3 {
xi: areg `X' lag10nrxag_sh_gdp mfgservshare urbrate negchg_1980t lpop lpop_sq i.period [w=pop] if sumcount >= `N', robust absorb(country_gjv) cluster(country_gjv)
regsave lag10nrxag_sh_gdp mfgservshare negchg_1980t using table3_panel_`X', replace ci level(90) pval
gen sample = e(sample)
sum year if sample == 1
drop sample
local lincomset1 "lag10nrxag_sh_gdp-mfgservshare"
proglincom "`lincomset1'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
local lincomset2 "negchg_1980t-mfgservshare"
proglincom "`lincomset2'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
outreg2 * using "tables\tableD7.xls", keep(*nrx* *mfgserv* negchg_1980t) addtext(NRXGDP, "`coeff1'", nrxse, "`se1'", DEINDU, "`coeff2'", deinduse, "`se2'", number,`N') se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
}
drop count sumcount
}
}

****************
**# FIGURE D6 #1
****************

use LACdata, clear
keep if year == 2020
* We create MFG+FIRE
gen mfgfireshare = mfgshare+fire2020_ma5_un if mfgshare != . & fire2020_ma5_un != . 
corr servshare fire2020_ma5_un if year == 2020
* 0.74
corr servshare fire2020_ma5_un [w=pop] if year == 2020
* 0.67
corr mfgservshare mfgfireshare if year == 2020
* 0.78
corr mfgservshare mfgfireshare [w=pop] if year == 2020
* 0.80
codebook if fire2020_ma5_un != .
* 78 out of 116

* MFGFIRE VS MFGSERV
twoway (scatter mfgservshare mfgfireshare if year == 2020,  mcolor(black) msize(medium))(lfit mfgservshare mfgfireshare if year == 2020, lcolor(black) lpattern(dash) lwidth(medthick)), xtitle(Share of MFG + FIRE in GDP (%) c. 2020, margin(small)) ytitle(Share of MFG + Services in GDP (%) c. 2020, margin(small)) legend(off) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white) margin(small)) ylabel(40(20)100) title(MFG + SERVICES vs. MFG + FIRE)
graph use "fig_mfgserv_mfgfire.gph"
graph export "figures\fig_mfgserv_mfgfire.png", replace width(2620) height(1908)
graph export "figures\figureD6A.png", replace width(2620) height(1908)

* FIRE VS SERV
twoway (scatter servshare fire2020_ma5_un if year == 2020, mcolor(black) msize(medium))(lfit servshare fire2020_ma5_un if year == 2020, lcolor(black) lpattern(dash) lwidth(medthick)), xtitle(Share of FIRE in GDP (%) c. 2020, margin(small)) ytitle(Share of Services in GDP (%) c. 2020, margin(small)) legend(off) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white) margin(small)) ylabel(40(20)80) xlabel(0(10)30) title(SERVICES vs. FIRE)
graph use "fig_serv_fire.gph"
graph export "figures\fig_serv_fire.png", replace width(2620) height(1908)
graph export "figures\figureD6B.png", replace width(2620) height(1908)

*************************************************************
*** WEB APPENDIX TABLE 7 - I2D2 - CROSS-SECTIONAL RESULTS ***
*************************************************************

*** LIST OF COUNTRIES ***
use LACdata, clear
keep if year == 2020
keep ccode country_gjv country_wb
sort country_wb
save LAC_code, replace 
count
* 116
use LACdata, clear
keep if year == 2020
keep ccode country_gjv country_wb
sort ccode
save LAC_code2, replace 
count
* 116

*** CCODE ***
clear
import excel "Data_Extract_From_World_Development_Indicators (19).xlsx", sheet("Data") firstrow clear
keep country_wb ccode
keep if _n <= 217
replace ccode = "ADO" if ccode == "AND"
replace ccode = "IMY" if ccode == "IMN"
replace ccode = "ROM" if ccode == "ROU"
replace ccode = "TMP" if ccode == "TLS"
replace ccode = "WBG" if ccode == "PSE"
replace ccode = "ZAR" if ccode == "COD"
replace ccode = "KSV" if ccode == "XKX"
sort ccode
save wbccode, replace

*** SUPER-IPUMS *** 
use LACdata, clear
keep if year == 2020
keep ccode country_gjv country_wb
sort country_gjv
save ccode2, replace 
use superipums, clear
sort country_gjv
merge country_gjv using ccode2
tab _m
drop if _m == 2 
drop _m
gen ipums = 1
sort country_wb year
save superipums2, replace
count
* 207
codebook country_wb
sum year
* 1960-2015

*** I2D2 - INDUSTRY ***
* Created from I2D2 (see the I2D2 folder)
use "i2d2_indu_all.dta", clear
drop count
tab industry
gen indu = "agr" if industry == 1
replace indu = "min" if industry == 2
replace indu = "mfg" if industry == 3
replace indu = "pu" if industry == 4
replace indu = "con" if industry == 5
replace indu = "trade" if industry == 6
replace indu = "tc" if industry == 7
replace indu = "fire" if industry == 8
replace indu = "admin" if industry == 9
replace indu = "otserv" if industry == 10
replace indu = "other" if industry == 11
drop industry
reshape wide sh, i(sample sumcount) j(indu) string
egen test = rsum(sh*)
sum test, d
* ok
drop test
sort ccode
merge ccode using wbccode
tab _m
drop if _m == 2
drop _m
codebook country_wb
replace country_wb = "Côte d'Ivoire" if country_wb == "Cote d'Ivoire"
sort country_wb
merge country_wb using LAC_code
drop if _m == 1
drop if _m == 2
drop _m
sort country_wb year
codebook country_wb
sum year 
codebook country_wb
gen census = strpos(sample, "cens")
tab census
tab sample if census > 1
drop census
collapse (mean) sh*, by(sample ccode year country_wb sumcount country_gjv)
gen i2d2 = 1
ren sumcount obsindu
sort country_wb year
save i2d2_indu_all2, replace

*** I2D2 - EMPLOYMENT STATUS - MEAN PER YEAR ***
* Created from I2D2 (see the I2D2 folder)
use "i2d2_empstat_all.dta", clear
drop count
tab empstat, m
tab empstat, m nol
* 1 paid
* 2 non-paid
* 3 employer
* 4 self-employed
* 5 other
* 99 missing
gen indu = "paid" if empstat == 1
replace indu = "unpaid" if empstat == 2
replace indu = "empl" if empstat == 3
replace indu = "self" if empstat == 4
replace indu = "emot" if empstat == 5
replace indu = "emmis" if empstat == 99
tab indu, m
tab indu, m nol
collapse (sum) sh (max) sumcount, by(sample ccode year indu)
reshape wide sh, i(sample sumcount) j(indu) string
egen test = rsum(sh*)
sum test, d
drop test
sort ccode
merge ccode using wbccode
tab _m
drop if _m == 2
drop _m
codebook country_wb
replace country_wb = "Côte d'Ivoire" if country_wb == "Cote d'Ivoire"
sort country_wb
merge country_wb using LAC_code
drop if _m == 1
drop if _m == 2
drop _m
sort country_wb year
codebook country_wb
sum year  
codebook country_wb
gen census = strpos(sample, "cens")
tab census
tab sample if census > 1
drop census
collapse (mean) sh*, by(sample ccode year country_wb sumcount country_gjv)
gen i2d2 = 1
ren sumcount obsempstat
sort country_wb year
save i2d2_empstat_all2, replace

*** I2D2 - FORMAL STATUS - MEAN PER YEAR ***
* Created from I2D2 (see the I2D2 folder)
use "i2d2_formal_all.dta", clear
sort ccode
merge ccode using wbccode
tab _m
drop if _m == 2
drop _m
codebook country_wb
replace country_wb = "Côte d'Ivoire" if country_wb == "Cote d'Ivoire"
sort country_wb
merge country_wb using LAC_code
drop if _m == 1
drop if _m == 2
drop _m
sort country_wb year
codebook country_wb
sum year 
codebook country_wb
gen census = strpos(sample, "cens")
tab census
tab sample if census > 1
drop census
collapse (mean) sh*, by(sample ccode year country_wb sumcount country_gjv)
gen i2d2 = 1
ren sumcount obsformal
sort country_wb year
save i2d2_formal_all2, replace

*** WE MERGE ***
clear
import excel "yearsall.xlsx", sheet("Sheet1") firstrow clear
sort year
save yearsall, replace
clear
import excel "Data_Extract_From_World_Development_Indicators.xlsx", sheet("Data") firstrow clear
drop if _n >= 218
reshape long y, i(ccode) j(year) string
ren y totpop
drop ccode
destring year, replace
replace country_wb = "Côte d'Ivoire" if country_wb == "Cote d'Ivoire"
sort country_wb year
save totpop, replace

*** I2D2 INDUSTRY - MEAN 1990s-2000s ***
* We create the mean for the 1990s-2000s
use LAC_code, clear
cross using yearsall
sort country_wb year
merge country_wb year using totpop
tab country_wb if _m == 1
drop if _m == 2
tab _m
drop _m
gen pop2019 = totpop if year == 2019
bysort ccode: egen poptoday = max(pop2019)
drop pop2019
sort country_wb year
merge country_wb year using i2d2_indu_all2
tab _m
drop _m
keep if year >= 1990 & i2d2 == 1
sum year, d
collapse (mean) shadmin-shtrade year [w=obs], by(country_wb ccode poptoday)
codebook country_wb
sum year, d
sum year [w=poptoday], d
keep country_wb shadmin-shtrade year
ren year i2d2yearI
sort country_wb
save i2d2indu1990s2010s, replace

*** I2D2 TYPE OF EMPLOYMENT - MEAN 1990s-2000s ***
* We create the mean for the 1990s-2000s
use LAC_code, clear
cross using yearsall
sort country_wb year
merge country_wb year using totpop
tab country_wb if _m == 1
drop if _m == 2
tab _m
drop _m
gen pop2019 = totpop if year == 2019
bysort ccode: egen poptoday = max(pop2019)
drop pop2019
sort country_wb year
merge country_wb year using i2d2_empstat_all2
tab _m
drop _m
keep if year >= 1990 & i2d2 == 1
sum year, d
collapse (mean) shemmis-shunpaid year [w=obs], by(country_wb ccode poptoday)
codebook country_wb
sum year, d
sum year [w=poptoday], d
keep country_wb shemmis-shunpaid year
ren year i2d2yearE
sort country_wb
save i2d2empl1990s2010s, replace

*** I2D2 FORMAL - MEAN 1990s-2000s ***
* We create the mean for the 1990s-2000s
use LAC_code, clear
cross using yearsall
sort country_wb year
merge country_wb year using totpop
tab country_wb if _m == 1
drop if _m == 2
tab _m
drop _m
gen pop2019 = totpop if year == 2019
bysort ccode: egen poptoday = max(pop2019)
drop pop2019
sort country_wb year
merge country_wb year using i2d2_formal_all2
tab _m
drop _m
keep if year >= 1990 & i2d2 == 1
sum year, d
collapse (mean) sh* year [w=obs], by(country_wb ccode poptoday)
codebook country_wb
sum year, d
sum year [w=poptoday], d
keep country_wb sh* year
ren year i2d2yearF
sort country_wb
save i2d2form1990s2010s, replace

***** REGRESSIONS *****

use LACdata, clear
** POP **
foreach X in 1960 2000 2010 2020 {
gen pop_`X' = pop if year == `X'
bysort ccode: egen pop`X' = max(pop_`X')
drop pop_`X'
}
gen pop1 = 1
gen pop201020 = pop2010 if year == 2010
replace pop201020 = pop2020 if year == 2020
sum pop201020 pop1960 pop1
foreach X in 1960 2000 2010 2020 {
gen lpcgdp_`X' = lpcgdp if year == `X'
bysort ccode: egen lpcgdp`X' = max(lpcgdp_`X')
drop lpcgdp_`X'
}
** DEINDUSTRIALIZATION **
foreach X in 1980 2000 2010 2020 {
gen indu_`X' = mfgshare if year == `X'
bysort country_gjv: egen indu`X' = max(indu_`X')
drop indu_`X'
}
gen chg_19802020 = indu2020 - indu1980
gen neg = (chg_19802020 < 0)
gen negchg_19802020 = 0 
replace negchg_19802020 = chg_19802020 if neg == 1
replace negchg_19802020 = -negchg_19802020
drop neg
gen chg_19802010 = indu2010 - indu1980
gen neg = (chg_19802010 < 0)
gen negchg_19802010 = 0 
replace negchg_19802010 = chg_19802010 if neg == 1
replace negchg_19802010 = -negchg_19802010
drop neg
gen chg_19802000 = indu2000 - indu1980
gen neg = (chg_19802000 < 0)
gen negchg_19802000 = 0 
replace negchg_19802000 = chg_19802000 if neg == 1
replace negchg_19802000 = -negchg_19802000
** ADDING THE I2D2-BASED VARIABLES **
foreach V in indu empl form {
sort country_wb
merge country_wb using i2d2`V'1990s2010s
tab _m
drop _m
}
foreach V of varlist sh* {
replace `V' = `V'*100
}
** SECTORS **
** MFG+FIRE **
* Agri, Mining, Manuf, Public util, Const, Commerce, Transport and Comm, Fire, Public Admin, Other Serv, Other
gen shmfgfire = shmfg+shfire
gen ipuI_mfgfire_t = ipuI_mfg_t + ipuI_finance_t + ipuI_buss_service_t
** TRADE **
* Trade includes hospitality
gen ipuI_trade2_t = ipuI_trade_t + ipuI_hospitality_t
gen ipuI_trade3_t = ipuI_trade_t + ipuI_hospitality_t + ipuI_other_services_t
* Trade + other services
gen shtrade2 = shtrade+shotserv
** GVT **
gen ipuI_govmt2_t = ipuI_govmt_t + ipuI_educ_t + ipuI_health_t

** AGRI+MIN **
gen shagrmin = shagr+shmin
** OTHER2 **
egen shother2 = rsum(shother shotserv) if shother != . | shotserv != . 
** TEST REMOVING SHOTHER **
foreach X of varlist shadmin-shtrade shagrmin shmfgfire shtrade2 {
gen `X'test = `X'/(100-shother)*100
replace `X'test = `X' if shother == . | shother == 0
sum `X' `X'test
}
drop shothertest 
** EMPL **
egen shemot2 = rsum(shemot shemmis) if shemot != . | shemmis != .
egen shunpoth = rsum(shunpaid shemot) if shunpaid != . | shemot != .
egen shunpoth2 = rsum(shunpaid shemot2) if shunpaid != . | shemot2 != .

* MFG-FIRE *
gen mfgfireshare = mfgshare+fire2020_ma5_un if mfgshare != . & fire2020_ma5_un != . 

*** PANEL B ***
*** SECTORS + TYPE OF EMPLOYMENT ***
* [NOTE TO MYSELF: 2000 AND 2010 GOOD FOR DIFFERENT REASONS - USE BOTH]
foreach G in t {
capture erase "tables\tableD8B.xls"
capture erase "tables\tableD8B.tex"
capture erase "tables\tableD8B.txt"
local lincomset "mfgservshare-nrxmean"
foreach V of varlist shmfg shfire shmfgfire shtrade shtrade2 shadmin shagrmin shcon shpaid shself shunpaid  {
foreach Y in 2010 {
foreach X in ag {
gen nrxmean = nrx`X'_mean`Y'
*** DEINDUSTRIALIZATION ***
local lincomset0 "nrxmean - mfgservshare"
local lincomset1 "negchg_1980`Y' - mfgservshare"
* Urbrate ctrl only *
xi: reg `V' nrxmean mfgservshare negchg_1980`Y' urbrate larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop`Y'] if year == `Y', robust
regsave nrxmean mfgservshare negchg_19802010 using table3_i2d2_`V', replace ci level(90) pval
proglincom "`lincomset0'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset1'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
outreg2 * using "tables\tableD8B.xls", keep(*nrx* *mfgserv* negchg_1980`Y') addtext(nrxcoef, "`coeff1'", nrxse, "`se1'", mfgservcoef, "`coeff2'", mfgservse, "`se2'", diffcoef, "`coeff3'", diffse, "`se3'", number,`N', number2,`O') se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
drop nrxmean
}
}
}
}
*** PANEL B MFG-FIRE ***
foreach G in t {
capture erase "tables\tableD8B_robfire.xls"
capture erase "tables\tableD8B_robfire.tex"
capture erase "tables\tableD8B_robfire.txt"
local lincomset "mfgfireshare-nrxmean"
foreach V of varlist shmfg shfire shmfgfire shtrade shtrade2 shadmin shagrmin shcon shpaid shself shunpaid  {
foreach Y in 2010 {
foreach X in ag {
gen nrxmean = nrx`X'_mean`Y'
*** DEINDUSTRIALIZATION ***
local lincomset0 "nrxmean - mfgfireshare"
local lincomset1 "negchg_1980`Y' - mfgfireshare"
* Urbrate ctrl only *
xi: reg `V' nrxmean mfgfireshare negchg_1980`Y' urbrate larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop`Y'] if year == `Y', robust
proglincom "`lincomset0'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset1'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
outreg2 * using "tables\tableD8B_robfire.xls", keep(*nrx* mfgfireshare negchg_1980`Y') addtext(nrxcoef, "`coeff1'", nrxse, "`se1'", mfgservcoef, "`coeff2'", mfgservse, "`se2'", diffcoef, "`coeff3'", diffse, "`se3'", number,`N', number2,`O') se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
drop nrxmean
}
}
}
}

save temp, replace

* we combine 
* We combine the coefficients *
* combining into one dta *
foreach V in shmfg shfire shmfgfire shtrade shtrade2 shadmin shagrmin shcon shpaid shself shunpaid {
use table3_i2d2_`V', clear
gen depvar = "`V'"
save table3_i2d2_`V'_2, replace 
}
use table3_i2d2_shmfg_2, clear
foreach V in shfire shmfgfire shtrade shtrade2 shadmin shagrmin shcon shpaid shself shunpaid {
append using table3_i2d2_`V'_2
}
save "intermediary\tabled5\table3coefs_i2d2.dta", replace

use temp, clear

*** PANEL A ***
*** SECTORS + TYPE OF EMPLOYMENT ***
* [NOTE TO MYSELF: 2000 AND 2010 GOOD FOR DIFFERENT REASONS - USE BOTH]
foreach G in t {
capture erase "tables\tableD8A.xls"
capture erase "tables\tableD8A.tex"
capture erase "tables\tableD8A.txt"
local lincomset "mfgservshare-nrxmean"
foreach V of varlist shmfg shfire shmfgfire shtrade shtrade2 shadmin shagrmin shcon shpaid shself shunpaid  {
foreach Y in 2000 {
foreach X in ag {
gen nrxmean = nrx`X'_mean`Y'
*** DEINDUSTRIALIZATION ***
local lincomset0 "nrxmean - mfgservshare"
local lincomset1 "negchg_1980`Y' - mfgservshare"
* Urbrate ctrl only *
xi: reg `V' nrxmean mfgservshare negchg_1980`Y' urbrate larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop`Y'] if year == `Y', robust
regsave nrxmean mfgservshare negchg_19802000 using table3_i2d2_`V', replace ci level(90) pval
proglincom "`lincomset0'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset1'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
outreg2 * using "tables\tableD8A.xls", keep(*nrx* *mfgserv* negchg_1980`Y') addtext(nrxcoef, "`coeff1'", nrxse, "`se1'", mfgservcoef, "`coeff2'", mfgservse, "`se2'", diffcoef, "`coeff3'", diffse, "`se3'", number,`N', number2,`O') se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
drop nrxmean
}
}
}
}

***** FIGURE 7 *****

* Preparing data * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *

use "intermediary\tabled5\table3coefs_i2d2.dta", clear

* Assigning stars to each coefficient
gen 	stars = ""
replace stars = "*" 	if pval < .10
replace stars = "**" 	if pval < .05
replace stars = "***" 	if pval < .01

* Determine location of coefficients within panels (using rownum) and
* across panels (using panelnum)

gen rownum   = 0
gen panelnum = 1

replace rownum = 1  if (depvar == "shmfgfire")
replace rownum = 2  if (depvar == "shmfg")
replace rownum = 3  if (depvar == "shfire")
replace rownum = 4  if (depvar == "shtrade")
replace rownum = 5  if (depvar == "shtrade2")
replace rownum = 6  if (depvar == "shadmin")
replace rownum = 7  if (depvar == "shagrmin")
replace rownum = 8  if (depvar == "shcon")

replace panelnum = 2 if rownum == 0

replace rownum = 1  if (depvar == "shpaid")
replace rownum = 2 	if (depvar == "shself")
replace rownum = . 	if (depvar == "shunpaid")
drop if rownum == .

* Rename Columns names 
replace var = "MFGSERV 2010" 		if var == "mfgservshare"
replace var = "NRXGDP 1960-2010" 	if var == "nrxmean"
replace var = "DEINDU 1980-2010" 	if var == "negchg_19802010"

* Generate new variable to orden sub-graphs as desired
gen 	ordvar = 1
replace ordvar = 2 if var == "NRXGDP 1960-2010"
replace ordvar = 3 if var == "DEINDU 1980-2010"

* Mask labels of new var with the values of 'var'
labmask ordvar, values(var)

* Creating Figures * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *

* Figure 1 * * * * *

preserve

keep if panelnum == 1

gen invis = ci_upper
gen invislab = ""

replace invislab = "diff -0.16***"	if ordvar == 2 & rownum == 1
replace invislab = "diff -0.23***"	if ordvar == 2 & rownum == 2
replace invislab = "diff 0.07" 		if ordvar == 2 & rownum == 3
replace invislab = "diff 0.05" 		if ordvar == 2 & rownum == 4
replace invislab = "diff -0.09" 	if ordvar == 2 & rownum == 5
replace invislab = "diff 0.16" 		if ordvar == 2 & rownum == 6
replace invislab = "diff 0.11" 		if ordvar == 2 & rownum == 7
replace invislab = "diff -0.07**" 	if ordvar == 2 & rownum == 8


replace invislab = "diff -0.02"		if ordvar == 3 & rownum == 1
replace invislab = "diff -0.19"		if ordvar == 3 & rownum == 2
replace invislab = "diff 0.17^"		if ordvar == 3 & rownum == 3
replace invislab = "diff 0.02" 		if ordvar == 3 & rownum == 4
replace invislab = "diff 0.86^"		if ordvar == 3 & rownum == 5
replace invislab = "diff -1.15**" 	if ordvar == 3 & rownum == 6
replace invislab = "diff 0.17"		if ordvar == 3 & rownum == 7
replace invislab = "diff 0.22**" 	if ordvar == 3 & rownum == 8

twoway ///
	(rcap ci_lower ci_upper rownum, lcolor(black) horizontal 					/// Confidence intervals
			by(ordvar, row(1) xrescale legend(off) note("") colfirst)) 			/// By-Comand styling
	(scatter rownum coef, by(ordvar) mcolor(blue) msymbol(D) msize(medium)			/// Coefficients plotting and styling
			mlabel(stars) mlabcolor(blue) mlabposition(6)						/// Coefficients stars + styling
						  mlabgap(*.5) mlabsize(medium)) 						/// .
	(scatter rownum invis, by(ordvar) msymbol(none) 							/// Coefficients plotting and styling
			mlabel(invislab) mlabcolor(black) mlabposition(3)					/// Coefficients stars + styling
						  mlabgap(*3) mlabsize(small)) 							/// .
	, /// 																		
	ylabel(1  "1.MFG+FIRE" 														/// Labels for variables (Y axis)
		   2  "2.MFG"  															/// .
		   3  "3.FIRE" 															/// .
		   4  "4.UNT1" 															/// .
		   5  "5.UNT2"															/// .
		   6  "6.GOVT"															/// .
		   7  "7.NRX"															/// .
		   8  "8.CONST"															/// .
		   , angle(0) nogrid) 													/// .
	xline(0, lpattern(longdash) lwidth(medium) lcolor(red)) 					///	Vertical line at 0 
	yline(3.5, lpattern(shortdash) lcolor(gs5)) 								/// Horizontal line (separate coefs)
	yline(5.5, lpattern(shortdash) lcolor(gs5)) 								/// Horizontal line (separate coefs)
	subtitle(,bcolor(white) lcol(black)) 										///	Define style of panel subtitles
	plotregion(margin(b=+2)) 													/// Adjust margins of plot
	plotregion(margin(r=+5)) 													/// Adjust margins of plot
	ytitle("") 																	/// Eliminate Y axis title
	yscale(reverse) 															//	Flip Y axis
graph export "figures/tableD5part1clean.png", replace  width(3000) height(1908)
graph export "figures/figure7a.png", replace  width(3000) height(1908)

restore

* Figure 2 * * * * *

preserve

keep if panelnum == 2

gen invis = ci_upper
gen invislab = ""

replace invislab = "diff -0.27"	if ordvar == 2 & rownum == 1
replace invislab = "diff 0.34*"	if ordvar == 2 & rownum == 2

replace invislab = "diff -0.71"	if ordvar == 3 & rownum == 1
replace invislab = "diff 1.09*"	if ordvar == 3 & rownum == 2


twoway ///
	(rcap ci_lower ci_upper rownum, lcolor(black) horizontal 					/// Confidence intervals
			by(ordvar, row(1) xrescale legend(off) note("") colfirst)) 			/// By-Comand styling
	(scatter rownum coef, by(ordvar) mcolor(blue) msymbol(D) msize(medium)		/// Coefficients plotting and styling
			mlabel(stars) mlabcolor(blue) mlabposition(6)						/// Coefficients stars + styling
						  mlabgap(*.5) mlabsize(medium)) 						/// .
	(scatter rownum invis, by(ordvar) msymbol(none) 							/// Coefficients plotting and styling
			mlabel(invislab) mlabcolor(black) mlabposition(3)					/// Coefficients stars + styling
						  mlabgap(*3) mlabsize(small)) 							/// .
	, /// 																		
	ylabel(1 "9.WAGE" 															/// Labels for variables (Y axis)
		   2 "10.SELF"															/// .
		   , angle(0) nogrid) 													/// .
	xline(0, lpattern(longdash) lwidth(medium) lcolor(red)) 					///	Vertical line at 0 
	subtitle(,bcolor(white) lcol(black)) 										///	Define style of panel subtitles
	plotregion(margin(b=+2)) 													/// Adjust margins of plot
	plotregion(margin(r=+5)) 													/// Adjust margins of plot
	ytitle("") 																	/// Eliminate Y axis title
	aspect(0.3)																	/// Aspect ratio of plot
	yscale(reverse) 															//	Flip Y axis		
graph export "figures/tableD5part2clean.png", replace width(3000) height(1908)
graph export "figures/figure7b.png", replace  width(3000) height(1908)

restore

******************
******************
**# TABLE D12 #1
**# TABLE D13 #2
**# FIGURE 9 #1
******************
******************

use LACdata, clear

* DISTRIBUTION OF HEIGHTS VARIABLE 2020 1960 FULL *
sum sumht_all if year == 2020, d
count if sumht_all == 1 & year == 2020
* 13
sum sumht_all if year == 1960, d
count if sumht_all == 1 & year == 1960
* 13
* 107
sum sumht_all if substr(string(year),-1,1) == "0", d
count if sumht_all == 1 & substr(string(year),-1,1) == "0"
* half of the sample

* HEIGHT BY TYPE
foreach B in all priv resid nores offic hotel retai gover { 
foreach X in 1960 2020 {
gen luht_`B'_`X' = luht_`B' if year == `X'
bysort ccode: egen luht_`B'`X' = max(luht_`B'_`X')
drop luht_`B'_`X'
}
}
* HEIGHT BY THRESHOLD 
foreach B in all priv resid nores gover { 
foreach N in 100 125 130 140 150 160 165 170 200 210 240 lc nolc l nol c noc {
foreach X in 1960 {
gen luht`N'_`B'_`X' = luht`N'_`B' if year == `X'
bysort ccode: egen luht`N'_`B'`X' = max(luht`N'_`B'_`X')
drop luht`N'_`B'_`X'
}
}
}
* PCGDP
foreach X in 1960 2010 2020 {
gen lpcgdp_`X' = lpcgdp if year == `X'
bysort ccode: egen lpcgdp`X' = max(lpcgdp_`X')
drop lpcgdp_`X'
}
* SUM OF HEIGHTS
foreach B in all priv resid nores offic hotel retai gover {
sum sumht_`B' if year == 2020, d
gen lsumht_`B' = log(sumht_`B')
foreach X in 1960 2020 {
gen lsumht_`B'_`X' = lsumht_`B' if year == `X'
bysort ccode: egen lsumht_`B'`X' = max(lsumht_`B'_`X')
drop lsumht_`B'_`X'
}
}
* URBAN POP
gen lupop = log(upop)
foreach X in 1960 2020 {
gen lupop_`X' = lupop if year == `X'
bysort ccode: egen lupop`X' = max(lupop_`X')
drop lupop_`X'
}
* POP
foreach X in 1960 2010 2020 {
gen pop_`X' = pop if year == `X'
bysort ccode: egen pop`X' = max(pop_`X')
drop pop_`X'
}
gen pop201020 = pop2020 if year == 2020
** DEINDUSTRIALIZATION **
foreach X in 1980 2000 2010 2020 {
gen indu_`X' = mfgshare if year == `X'
bysort country_gjv: egen indu`X' = max(indu_`X')
drop indu_`X'
}
gen chg_19802020 = indu2020 - indu1980
gen neg = (chg_19802020 < 0)
gen negchg_19802020 = 0 
replace negchg_19802020 = chg_19802020 if neg == 1
replace negchg_19802020 = -negchg_19802020
drop neg
gen chg_19802010 = indu2010 - indu1980
gen neg = (chg_19802010 < 0)
gen negchg_19802010 = 0 
replace negchg_19802010 = chg_19802010 if neg == 1
replace negchg_19802010 = -negchg_19802010
drop neg
gen chg_19802000 = indu2000 - indu1980
gen neg = (chg_19802000 < 0)
gen negchg_19802000 = 0 
replace negchg_19802000 = chg_19802000 if neg == 1
replace negchg_19802000 = -negchg_19802000

sum luht_all luht_resid luht_nores [w=pop] if year == 2020

*** TABLE TALL BUILDING CONSTRUCTION AND THE VARIABLES OF INTEREST - TYPE OF BUILDINGS ***

*** TABLE 12A PANEL A ***

capture erase "tables\tableD12A.xls"
capture erase "tables\tableD12A.tex"
capture erase "tables\tableD12A.txt"
local lincomset1 "mfgservshare - nrxmean"
local lincomset2 "mfgservshare - chg_19802020"
** NO CTRLS **
* Same specification as Table 1
* No need to control for urbanization 1960 since urbanization 2020 
* Need to control for pcgdp 2020 + mean 1960-2010 
* TYPE *
foreach X in ag {
foreach B in all resid nores offic hotel retai gover { 
foreach Y in 2020 {
gen nrxmean = nrx`X'_mean`Y'
gen nrx1960 = nrx`X'_sh_gdp1960
xi: reg luht_`B' nrxmean mfgservshare chg_19802020 nrx1960 mfgserv_ca1960 luht_`B'1960 larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop] if year == `Y', robust
proglincom "`lincomset1'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset2'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
outreg2 * using "tables\tableD12A.xls", keep(nrxmean mfgservshare chg_19802020) addtext(mfgnrx, "`coeff1'", se1, "`se1'", mfgdeindu, "`coeff2'", se2, "`se2'", reg, type, build, `B') se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
lincom nrxmean - mfgservshare
drop nrxmean nrx1960
}
}
}

*** TABLE D12 PANEL B ***

capture erase "tables\tableD12B.xls"
capture erase "tables\tableD12B.tex"
capture erase "tables\tableD12B.txt"
local lincomset1 "mfgservshare - nrxmean"
local lincomset2 "mfgservshare - chg_19802020"
* Same specification as Table 1
* No need to control for urbanization 1960 since urbanization 2020 
* Need to control for pcgdp 2020 + mean 1960-2010 
* TYPE *
foreach X in ag {
foreach B in all resid nores offic hotel retai gover { 
foreach Y in 2020 {
gen nrxmean = nrx`X'_mean`Y'
gen nrx1960 = nrx`X'_sh_gdp1960
xi: reg luht_`B' urbrate lpcgdp2020 chg_19802020 meanlpcgdp19602020 nrxmean mfgservshare nrx1960 mfgserv_ca1960 luht_`B'1960 larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop] if year == `Y', robust
regsave chg_19802020 nrxmean mfgservshare using table5_`B', replace ci level(90) pval
proglincom "`lincomset1'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset2'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
outreg2 * using "tables\tableD12B.xls", keep(nrxmean mfgservshare chg_19802020) addtext(ctrls, yes, mfgnrx, "`coeff1'", se1, "`se1'", mfgdeindu, "`coeff2'", se2, "`se2'", reg, type, build, `B') se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
lincom mfgservshare - nrxmean
drop nrxmean nrx1960
}
}
}

save temp, replace 

* We combine the coefficients *
* combining into one dta *
foreach V in all resid nores offic hotel retai gover { 
use table5_`V', clear
gen depvar = "`V'"
save table5_`V'_2, replace 
}
use table5_all_2, clear
foreach V in resid nores offic hotel retai gover {
append using table5_`V'_2
}
save "intermediary\table5and6\table5coefs", replace

*** TABLE D13 PANEL A ***

use temp, clear
* We create MFG+FIRE
gen mfgfireshare = mfgshare+fire2020_ma5_un if mfgshare != . & fire2020_ma5_un != . 

capture erase "tables\tableD13A.xls"
capture erase "tables\tableD13A.tex"
capture erase "tables\tableD13A.txt"
local lincomset1 "nrxmean-mfgservshare"
local lincomset2 "chg_19802020-mfgservshare"
* TYPE *
foreach X in ag {
foreach B in nores { 
foreach Y in 2020 {
gen nrxmean = nrx`X'_mean`Y'
gen nrx1960 = nrx`X'_sh_gdp1960
xi: reg luht_`B' urbrate lpcgdp2020 chg_19802020 meanlpcgdp19602020 nrxmean mfgservshare nrx1960 mfgserv_ca1960 luht_`B'1960 larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop] if year == `Y', robust
proglincom "`lincomset1'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset2'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
outreg2 * using "tables\tableD13A.xls", keep(nrxmean mfgservshare chg_19802020) addtext(mfgnrx, "`coeff1'", se1, "`se1'", mfgdeindu, "`coeff2'", se2, "`se2'",reg, type, build, `B') se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
lincom mfgservshare - nrxmean
drop nrxmean nrx1960
}
}
}
foreach X in ag { 
foreach B in nores { 
foreach N in 125 140 160 200 240 {
foreach Y in 2020 {
gen nrxmean = nrx`X'_mean`Y'
gen nrx1960 = nrx`X'_sh_gdp1960
xi: reg luht`N'_`B' urbrate lpcgdp2020 meanlpcgdp19602020 nrxmean mfgservshare chg_19802020 nrx1960 mfgserv_ca1960 luht_`B'1960 larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop] if year == `Y', robust
regsave chg_19802020 nrxmean mfgservshare using table5_nores`N', replace ci level(90) pval
proglincom "`lincomset1'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset2'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
outreg2 * using "tables\tableD13A.xls", keep(nrxmean mfgservshare chg_19802020) addtext(mfgnrx, "`coeff1'", se1, "`se1'", mfgdeindu, "`coeff2'", se2, "`se2'",reg, height, build, `B', height, `N') se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
lincom mfgservshare - nrxmean
drop nrxmean nrx1960
}
}
}
}
** ROBUSTNESS WITH MFGFIRE **
capture erase "tables\tableD13A_robfire.xls"
capture erase "tables\tableD13A_robfire.tex"
capture erase "tables\tableD13A_robfire.txt"
local lincomset1 "nrxmean-mfgfireshare"
local lincomset2 "chg_19802020-mfgfireshare"
* TYPE *
foreach X in ag {
foreach B in nores { 
foreach Y in 2020 {
gen nrxmean = nrx`X'_mean`Y'
gen nrx1960 = nrx`X'_sh_gdp1960
xi: reg luht_`B' urbrate lpcgdp2020 chg_19802020 meanlpcgdp19602020 nrxmean mfgfireshare nrx1960 mfgserv_ca1960 luht_`B'1960 larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop] if year == `Y', robust
proglincom "`lincomset1'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset2'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
outreg2 * using "tables\tableD13A_robfire.xls", keep(nrxmean mfgfireshare chg_19802020) addtext(mfgnrx, "`coeff1'", se1, "`se1'", mfgdeindu, "`coeff2'", se2, "`se2'",reg, type, build, `B') se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
lincom mfgfireshare - nrxmean
drop nrxmean nrx1960
}
}
}
foreach X in ag { 
foreach B in nores { 
foreach N in 125 140 160 200 240 {
foreach Y in 2020 {
gen nrxmean = nrx`X'_mean`Y'
gen nrx1960 = nrx`X'_sh_gdp1960
xi: reg luht`N'_`B' urbrate lpcgdp2020 meanlpcgdp19602020 nrxmean mfgfireshare chg_19802020 nrx1960 mfgserv_ca1960 luht_`B'1960 larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop] if year == `Y', robust
proglincom "`lincomset1'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset2'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
outreg2 * using "tables\tableD13A_robfire.xls", keep(nrxmean mfgfireshare chg_19802020) addtext(mfgnrx, "`coeff1'", se1, "`se1'", mfgdeindu, "`coeff2'", se2, "`se2'",reg, height, build, `B', height, `N') se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
lincom mfgfireshare - nrxmean
drop nrxmean nrx1960
}
}
}
}

* We combine the coefficients *
* combining into one dta *
foreach V in nores125 nores140 nores160 nores200 nores240 {
use table5_`V', clear
gen depvar = "`V'"
save table5_`V'_2, replace 
}
use table5_nores125_2, clear
foreach V in nores140 nores160 nores200 nores240 {
append using table5_`V'_2
}
save "intermediary\table5and6\table5coefs_cutoff", replace

***** TABLE D12 PANEL C *****

use LACdata, clear
gen lastyr = substr(string(year),4,1)
tab lastyr
destring lastyr, replace 
drop if lastyr == 5
keep if year >= 1960 & year <= 2020
foreach X in 1960 2020 {
gen pop_`X' = pop if year == `X'
bysort ccode: egen pop`X' = max(pop_`X')
drop pop_`X'
}
gen popt = pop
* Deindustrialization variables *
foreach X in 1980 {
gen indu_`X' = mfgshare if year == `X'
bysort country_gjv: egen indu`X' = max(indu_`X')
drop indu_`X'
}
gen indu = mfgshare
gen chg_1980t = indu - indu1980
gen neg = (chg_1980t < 0)
gen negchg_1980t = 0 
replace negchg_1980t = chg_1980t if neg == 1 & year >= 1980
sum negchg_1980t, d
bysort year: sum negchg_1980t
sum mfgservshare negchg_1980t
corr mfgservshare negchg_1980t [w=pop2020]
* -0.46
foreach X in 1 2 3 4 {
sort ccode year
bysort ccode: gen lag`X'nrxag_sh_gdp = nrxag_sh_gdp[_n-`X']
sort ccode year
bysort ccode: gen lag`X'mfgservshare = mfgservshare[_n-`X']
sort ccode year
bysort ccode: gen lag`X'negchg_1980t = negchg_1980t[_n-`X']
}
foreach X in 1 2 3 4 {
foreach B in all resid nores {
sort ccode year
bysort ccode: gen lag`X'luht_`B' = luht_`B'[_n-`X']
}
}

** BETTER WITH 4 LAGS (SO VARIABLE t + 3 LAGS) **

***** TYPE *****

capture erase "tables\tableD12C.xls"
capture erase "tables\tableD12C.tex"
capture erase "tables\tableD12C.txt"
* 4 LAGS - CONTROLS *
foreach B in all resid nores offic hotel retai gover { 
foreach C in ag {
foreach Z in 2020 {
local lincomset0 "lag1nrx`C'_sh_gdp + lag2nrx`C'_sh_gdp + lag3nrx`C'_sh_gdp + lag4nrx`C'_sh_gdp"
local lincomset1 "mfgservshare + lag1mfgservshare + lag2mfgservshare + lag3mfgservshare"
local lincomset2 "negchg_1980t + lag1negchg_1980t + lag2negchg_1980t + lag3negchg_1980t"
xi: areg luht_`B' lpcgdp urbrate lag1nrx`C'_sh_gdp lag2nrx`C'_sh_gdp lag3nrx`C'_sh_gdp lag4nrx`C'_sh_gdp mfgservshare lag1mfgservshare lag2mfgservshare lag3mfgservshare negchg_1980t lag1negchg_1980t lag2negchg_1980t lag3negchg_1980t lpop lpop_sq i.year [w=pop`Z'] if year <= 2020, robust absorb(ccode) clust(ccode)
proglincom "`lincomset0'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset1'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
proglincom "`lincomset2'"
local coeff3 = r(coefftxt)
local se3 = r(setxt)
outreg2 * using "tables\tableD12C.xls", keep(*nrx* *mfgservshare* *negchg_1980t*) addtext(ctrls, yes, nrxcoef, "`coeff1'", nrxse, "`se1'", mfgservcoef, "`coeff2'", mfgservse, "`se2'", deinducoef, "`coeff3'", deinduse, "`se3'") se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
lincom (mfgservshare + lag1mfgservshare + lag2mfgservshare + lag3mfgservshare - (lag1nrx`C'_sh_gdp + lag2nrx`C'_sh_gdp + lag3nrx`C'_sh_gdp + lag4nrx`C'_sh_gdp))
}
}
}

***** HEIGHT *****

capture erase "tables\tableD13B.xls"
capture erase "tables\tableD13B.tex"
capture erase "tables\tableD13B.txt"
* 4 LAGS
foreach B in nores { 
foreach C in ag {
foreach Z in 2020 {
local lincomset0 "lag1nrx`C'_sh_gdp + lag2nrx`C'_sh_gdp + lag3nrx`C'_sh_gdp + lag4nrx`C'_sh_gdp"
local lincomset1 "mfgservshare + lag1mfgservshare + lag2mfgservshare + lag3mfgservshare"
local lincomset2 "negchg_1980t + lag1negchg_1980t + lag2negchg_1980t + lag3negchg_1980t"
xi: areg luht_`B' lpcgdp urbrate lag1nrx`C'_sh_gdp lag2nrx`C'_sh_gdp lag3nrx`C'_sh_gdp lag4nrx`C'_sh_gdp mfgservshare lag1mfgservshare lag2mfgservshare lag3mfgservshare  negchg_1980t lag1negchg_1980t lag2negchg_1980t lag3negchg_1980t lpop lpop_sq i.year [w=pop`Z'] if year <= 2020, robust absorb(ccode) clust(ccode)
proglincom "`lincomset0'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset1'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
proglincom "`lincomset2'"
local coeff3 = r(coefftxt)
local se3 = r(setxt)
outreg2 * using "tables\tableD13B.xls", keep(*nrx* *mfgservshare* *negchg_1980t*) addtext(nrxcoef, "`coeff1'", nrxse, "`se1'", mfgservcoef, "`coeff2'", mfgservse, "`se2'", deinducoef, "`coeff3'", deinduse, "`se3'") se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
}
}
* 4 LAGS - HEIGHT
foreach N in 125 140 160 200 240 {
foreach C in ag {
foreach Z in 2020 {
local lincomset0 "lag1nrx`C'_sh_gdp + lag2nrx`C'_sh_gdp + lag3nrx`C'_sh_gdp + lag4nrx`C'_sh_gdp"
local lincomset1 "mfgservshare + lag1mfgservshare + lag2mfgservshare + lag3mfgservshare"
local lincomset2 "negchg_1980t + lag1negchg_1980t + lag2negchg_1980t + lag3negchg_1980t"
xi: areg luht`N'_`B' lpcgdp urbrate lag1nrx`C'_sh_gdp lag2nrx`C'_sh_gdp lag3nrx`C'_sh_gdp lag4nrx`C'_sh_gdp mfgservshare lag1mfgservshare lag2mfgservshare lag3mfgservshare  negchg_1980t lag1negchg_1980t lag2negchg_1980t lag3negchg_1980t lpop lpop_sq i.year [w=pop`Z'] if year <= 2020, robust absorb(ccode) clust(ccode)
proglincom "`lincomset0'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset1'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
proglincom "`lincomset2'"
local coeff3 = r(coefftxt)
local se3 = r(setxt)
outreg2 * using "tables\tableD13B.xls", keep(*nrx* *mfgservshare* *negchg_1980t*) addtext(height, `N', nrxcoef, "`coeff1'", nrxse, "`se1'", mfgservcoef, "`coeff2'", mfgservse, "`se2'", deinducoef, "`coeff3'", deinduse, "`se3'") se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
}
}
}
}

***** FIGURE 9 *****

* Preparing data * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *

use "intermediary\table5and6\table5coefs.dta", clear
append using "intermediary\table5and6\table5coefs_cutoff.dta"

* Assigning stars to each coefficient
gen 	stars = ""
replace stars = "*" 	if pval < .10
replace stars = "**" 	if pval < .05
replace stars = "***" 	if pval < .01

* Duplicate "nores" coefficient as wee need it in two different panel
expand 2 if depvar == "nores", gen(dupindicator)

* Determine location of coefficients within panels (using rownum) and
* across panels (using panelnum)

gen rownum   = 0
gen panelnum = 1


replace rownum = 1  if (depvar == "all")
replace rownum = 2  if (depvar == "resid")
replace rownum = 3  if (depvar == "nores" & dupindicator == 0)
replace rownum = 4  if (depvar == "offic")
replace rownum = 5  if (depvar == "hotel")
replace rownum = 6  if (depvar == "retai")
replace rownum = 7  if (depvar == "gover")

replace panelnum = 2 if rownum == 0

replace rownum = 1  if (depvar == "nores" & dupindicator == 1)
replace rownum = 2  if (depvar == "nores125")
replace rownum = 3  if (depvar == "nores140")
replace rownum = 4 	if (depvar == "nores160")
replace rownum = 5 	if (depvar == "nores200")
replace rownum = 6 	if (depvar == "nores240")

replace panelnum = 3 if rownum == 0

replace rownum = 1  if (depvar == "tipocp")
replace rownum = 2  if (depvar == "stel")
replace rownum = 3  if (depvar == "conc")
replace rownum = 4  if (depvar == "c")
replace rownum = 5  if (depvar == "lc")
replace rownum = 6  if (depvar == "nolc")

* Rename Columns names 
replace var = "MFGSERV 2020" 		if var == "mfgservshare"
replace var = "NRXGDP 1960-2020" 	if var == "nrxmean"
replace var = "DEINDU 1980-2020" 	if var == "chg_19802020"

* Generate new variable to orden sub-graphs as desired
gen 	ordvar = 1
replace ordvar = 2 if var == "NRXGDP 1960-2020"
replace ordvar = 3 if var == "DEINDU 1980-2020"

* Mask labels of new var with the values of 'var'
labmask ordvar, values(var)

* Creating Figures * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *

* Figure 1 * * * * *

preserve

keep if panelnum == 1

gen invis = ci_upper
gen invislab = ""

replace invislab = "diff 0.03"		if ordvar == 2 & rownum == 1
replace invislab = "diff 0.05"		if ordvar == 2 & rownum == 2
replace invislab = "diff 0.05**" 	if ordvar == 2 & rownum == 3
replace invislab = "diff 0.04*" 	if ordvar == 2 & rownum == 4
replace invislab = "diff 0.02" 		if ordvar == 2 & rownum == 5
replace invislab = "diff 0.06" 		if ordvar == 2 & rownum == 6
replace invislab = "diff 0.03" 		if ordvar == 2 & rownum == 7

replace invislab = "diff -0.01"		if ordvar == 3 & rownum == 1
replace invislab = "diff -0.01"		if ordvar == 3 & rownum == 2
replace invislab = "diff -0.04" 	if ordvar == 3 & rownum == 3
replace invislab = "diff -0.09***" 	if ordvar == 3 & rownum == 4
replace invislab = "diff 0.07*" 	if ordvar == 3 & rownum == 5
replace invislab = "diff -0.10^" 	if ordvar == 3 & rownum == 6
replace invislab = "diff -0.00"		if ordvar == 3 & rownum == 7

twoway ///
	(rcap ci_lower ci_upper rownum, lcolor(black) horizontal 					/// Confidence intervals
			by(ordvar, row(1) xrescale legend(off) note("") colfirst)) 			/// By-Comand styling
	(scatter rownum coef, by(ordvar) mcolor(blue) msymbol(D) msize(medium)		/// Coefficients plotting and styling
			mlabel(stars) mlabcolor(blue) mlabposition(6)						/// Coefficients stars + styling
						  mlabgap(*.5) mlabsize(medium)) 						/// .
	(scatter rownum invis, by(ordvar) msymbol(none) 							/// Coefficients plotting and styling
			mlabel(invislab) mlabcolor(black) mlabposition(3)					/// Coefficients stars + styling
						  mlabgap(*4) mlabsize(small)) 							/// .
	, ///																				
	ylabel(1  "1.All" 															/// Labels for variables (Y axis)
		   2  "2.Resid"  															/// .
		   3  "3.Non-Res" 														/// .
		   4  "4.Office" 															/// .
		   5  "5.Hotel"															/// .
		   6  "6.Retail"															/// .
		   7  "7.Govt"															/// .
		   , angle(0) nogrid) 													/// .
	xline(0, lpattern(longdash) lwidth(medium) lcolor(red)) 					///	Vertical line at 0 
	subtitle(,bcolor(white) lcol(black)) 										///	Define style of panel subtitles
	plotregion(margin(b=+2)) 													/// Adjust margins of plot
	plotregion(margin(r=+5)) 													/// Adjust margins of plot
	ytitle("") 																	/// Eliminate Y axis title
	aspect(0.9)																	/// Aspect ratio of plot
	yscale(reverse) 															//	Flip Y axis
graph export "Figures/tableconst_part1clean.png", replace width(3000) height(1908)

graph export "Figures/figure9b.png", replace width(3000) height(1908)


restore

* Figure 2 * * * * *

preserve

keep if panelnum == 2

gen invis = ci_upper
gen invislab = ""

replace invislab = "diff 0.05**"	if ordvar == 2 & rownum == 1
replace invislab = "diff 0.05"		if ordvar == 2 & rownum == 2
replace invislab = "diff 0.11***" 	if ordvar == 2 & rownum == 3
replace invislab = "diff 0.03" 		if ordvar == 2 & rownum == 4
replace invislab = "diff 0.11^" 	if ordvar == 2 & rownum == 5
replace invislab = "diff 0.12*" 	if ordvar == 2 & rownum == 6

replace invislab = "diff -0.04"		if ordvar == 3 & rownum == 1
replace invislab = "diff -0.05"		if ordvar == 3 & rownum == 2
replace invislab = "diff -0.04" 	if ordvar == 3 & rownum == 3
replace invislab = "diff -0.10^" 	if ordvar == 3 & rownum == 4
replace invislab = "diff -0.01" 	if ordvar == 3 & rownum == 5
replace invislab = "diff -0.00" 	if ordvar == 3 & rownum == 6

twoway ///
	(rcap ci_lower ci_upper rownum, lcolor(black) horizontal 					/// Confidence intervals
			by(ordvar, row(1) xrescale legend(off) note("") colfirst)) 			/// By-Comand styling
	(scatter rownum coef, by(ordvar) mcolor(blue) msymbol(D) msize(medium)		/// Coefficients plotting and styling
			mlabel(stars) mlabcolor(blue) mlabposition(6)						/// Coefficients stars + styling
						  mlabgap(*.5) mlabsize(medium)) 						/// .
	(scatter rownum invis, by(ordvar) msymbol(none) 							/// Coefficients plotting and styling
			mlabel(invislab) mlabcolor(black) mlabposition(3)					/// Coefficients stars + styling
						  mlabgap(*4) mlabsize(small)) 							/// .
	, ///																				
	ylabel(1  "8.Non-Res.All" 													/// Labels for variables (Y axis)
		   2  "9.Non-Res.125m"															/// .
		   3  "10.Non-Res.140m"															/// .
		   4  "11.Non-Res.160m"															/// .
		   5  "12.Non-Res.200m"															/// .
		   6  "13.Non-Res.240m"															/// .
		   , angle(0) nogrid) 													/// .
	xline(0, lpattern(longdash) lwidth(medium) lcolor(red)) 					///	Vertical line at 0
	subtitle(,bcolor(white) lcol(black)) 										///	Define style of panel subtitles
	plotregion(margin(b=+2)) 													/// Adjust margins of plot
	plotregion(margin(r=+5)) 													/// Adjust margins of plot
	ytitle("") 																	/// Eliminate Y axis title
	aspect(0.9)																	/// Aspect ratio of plot
	yscale(reverse) 															//	Flip Y axis
graph export "Figures/tableconst_part2clean.png", replace width(3000) height(1908)
graph export "Figures/figure9b.png", replace width(3000) height(1908)

restore

******************
******************
**# TABLE D14 #2
******************
******************

*******************************
*** CEMENT: EXPORTS/IMPORTS ***
*******************************

* From COMTRADE
* https://comtrade.un.org/data/
foreach X in 1970 1975 1980 1985 1990 1995 2000 2005 2010 2015 {
clear
import delimited "comtrade`X'.csv", clear
keep year tradeflow reporter reporteriso qtyunit qty
tab qtyunit
replace qty = . if qtyunit == "No Quantity" | qtyunit == "Number of items"
drop if qty == .
sort reporteriso
save comtrade`X', replace
}
use comtrade1970, clear
foreach X in 1975 1980 1985 1990 1995 2000 2005 2010 2015 {
append using comtrade`X'
}
sort reporteriso year
save cement_trade, replace
tab qtyunit, m

use cement_trade, clear
sort reporteriso tradeflow year 
* Small issue for Malaysia that we correct
replace qty = (1587012420+562147866)/2 if reporter == "Malaysia" & year == 2008
sort reporteriso year
save cement_trade, replace

** Evolution - sum of exports and sum of imports **
* Total production *
use cement_temp, clear
tab year
keep if year >= 1970
collapse (sum) cement, by(year)
twoway (connected cement year)
ren cement cementprod
replace cementprod = cementprod*1000
* 4 billion today (tons)
sort year
save cementprod, replace

* Exports 
use cement_trade, clear
tab tradeflow 
keep if tradeflow == "Export"
* Right now, kgs
* We convert to tons 
replace qty = qty/1000
collapse (sum) qty, by(year)
ren qty export
sort year
save cementexport, replace

* Imports
use cement_trade, clear
tab tradeflow 
keep if tradeflow == "Import"
* Right now, kgs
* We convert to tons 
replace qty = qty/1000
collapse (sum) qty, by(year)
ren qty import
sort year
save cementimport, replace

* Combine 
use cementprod, clear
sort year
merge year using cementexport
tab _m
drop _m
sort year
merge year using cementimport
tab _m
drop _m
twoway (connected cement year) (connected export year) (connected import year), legend(order(1 "Production" 2 "Export" 3 "Import") row(1))
corr export import
* 0.96
gen shexport = export/cement*100
gen shimport = import/cement*100
sum shexport, d
* 2-7
sum shimport, d
* 2-6
twoway (connected shexport year) 
twoway (connected shexport year) (connected shimport year) 
sum shexport shimport if year == 2019
* 4.3-3.2
gen meansh = (shexport + shimport)/2
* We use the mean of the two, so 
sum meansh if year == 2019
* 3.7% today
sum meansh
* 4.9% 1970-2019
sum meansh [w=cement]
* 4.8% 1970-2019

* Cement data *
use cement_trade, clear
tab reporter
tab reporteriso if reporter == "Yemen" | reporter == "Fmr Arab Rep. of Yemen" | reporter == "Fmr Dem. Yemen" | reporter == "Fmr Dem. Yemen"
replace reporteriso = "YEM" if reporteriso == "YMD"
replace reporter = "Yemen" if reporter == "Yemen" | reporter == "Fmr Arab Rep. of Yemen" | reporter == "Fmr Dem. Yemen" | reporter == "Fmr Dem. Yemen"
tab qtyunit
* Right now, kgs
* We convert to tons 
replace qty = qty/1000
gen cementexport = qty if tradeflow == "Export"
gen cementimport = qty if tradeflow == "Import"
collapse (sum) cementexport cementimport, by(reporter reporteriso year)
sort reporteriso year
save cement_trade2, replace
* No Eq Guinea and no Taiwan

* Combine with the main data
use LACdata, clear
keep if year == 2020
keep ccode country_wb 
gen reporteriso = ccode
sort reporteriso 
merge reporteriso using cement_trade2
tab _m
tab ccode if _m == 1
* OK, no
tab reporter if _m == 2
drop if _m == 2
drop _m
tab year
* Assume that if missing = 0.
drop ccode reporter*
sort country_wb year
save cement_trade3, replace

* We obtain the sum of imports and exports *
use cement_trade3, clear
collapse (sum) cementexport cementimport, by(country_wb)
ren cementexport cementexport7020
ren cementimport cementimport7020
sort country_wb
save cement_trade4, replace

* Combining * 
use cement_trade3, clear
collapse (sum) cementexport cementimport, by(year)
sort year
save temp, replace

use cement_temp, clear
sort country_wb
save cement_temp, replace

use LACdata, clear
keep if year == 2020
keep country_wb
count
* 116
sort country_wb
merge country_wb using cement_temp
tab _m
drop if _m == 2
drop _m
collapse (sum) cement, by(year)
keep if year >= 1970
sort year
merge year using temp
tab _m
drop _m
drop if year == .
ren cementexport export
ren cementimport import
sum export cement
gen shexport = (export/1000)/cement*100
gen shimport = (import/1000)/cement*100
sum shexport, d
* 2-7
sum shimport, d
* 2-6
twoway (connected shexport year) (connected shimport year) 
sum shexport shimport if year == 2019
* 2.8-1.9
gen meansh = (shexport + shimport)/2
* We use the mean of the two, so 
sum meansh if year == 2019
* 2.4% today
sum meansh
* 4.5% 1970-2019
sum meansh [w=cement]
* 3.7% 1970-2019

****************************************
*** CEMENT: CROSS-SECTIONAL ANALYSIS ***
***************************************

* USGS 85000 for U.S. (Thousand metric tons)	  										
* HYDRAULIC CEMENT: WORLD PRODUCTION, BY COUNTRY OR LOCALITY1, 2																				
*(Thousand metric tons)											

use LACdata, clear

** From 1960 * 
gen lcement19702017 = log(cement19702017)
gen lcement19602017 = log(cement19602017)
gen lupop = log(upop)
foreach X in 1960 1970 2020 {
gen lupop_`X' = lupop if year == `X'
bysort ccode: egen lupop`X' = max(lupop_`X')
drop lupop_`X'
}
codebook ccode if lcement19602017 != .
codebook ccode if cement19702017 != .
* LPGCPD
foreach X in 1960 2010 2020 {
gen lpcgdp_`X' = lpcgdp if year == `X'
bysort ccode: egen lpcgdp`X' = max(lpcgdp_`X')
drop lpcgdp_`X'
}
* POP
foreach X in 1960 2010 2020 {
gen pop_`X' = pop if year == `X'
bysort ccode: egen pop`X' = max(pop_`X')
drop pop_`X'
}
gen pop201020 = pop2020 if year == 2020
* CEMENT 
foreach X in 1960 1970 {
gen lcement10`X' = lcement10 if year == `X'
bysort ccode: egen lcement10_`X' = max(lcement10`X')
drop lcement10`X'
}
* CEMENT EXPORTS AND IMPORTS 
sort country_wb
merge country_wb using cement_trade4
tab _m
drop _m
* Test
replace cementexport7020 = cementexport7020/1000
replace cementimport7020 = cementimport7020/1000
replace cementexport7020 = 0 if cementexport7020 == . 
replace cementimport7020 = 0 if cementimport7020 == . 
codebook cement19702017 cementexport7020 cementimport7020
gen cemcons7020 = cement19702017-cementexport7020+cementimport7020
sum cemcons7020, d
tab ccode if cemcons7020 < 0 & year == 2020
* Haiti
* We replace by the minimal positive value
sum cemcons7020 if cemcons7020 > 0, d
replace cemcons7020 = 8195.982 if ccode == "HTI"
gen lcemcons7020 = log(cemcons7020)
corr lcement19702017 lcemcons7020 if year == 2020
* 0.98

** DEINDUSTRIALIZATION **
foreach X in 1980 2000 2010 2020 {
gen indu_`X' = mfgshare if year == `X'
bysort country_gjv: egen indu`X' = max(indu_`X')
drop indu_`X'
}
gen chg_19802020 = indu2020 - indu1980
gen neg = (chg_19802020 < 0)
gen negchg_19802020 = 0 
replace negchg_19802020 = chg_19802020 if neg == 1
replace negchg_19802020 = -negchg_19802020
drop neg
gen chg_19802010 = indu2010 - indu1980
gen neg = (chg_19802010 < 0)
gen negchg_19802010 = 0 
replace negchg_19802010 = chg_19802010 if neg == 1
replace negchg_19802010 = -negchg_19802010
drop neg
gen chg_19802000 = indu2000 - indu1980
gen neg = (chg_19802000 < 0)
gen negchg_19802000 = 0 
replace negchg_19802000 = chg_19802000 if neg == 1
replace negchg_19802000 = -negchg_19802000

capture erase "tables\tableD14cols_1_4.xls"
capture erase "tables\tableD14cols_1_4.tex"
capture erase "tables\tableD14cols_1_4.txt"
*** CEMENT PRODUCTION ***
* No controls *
foreach X in ag {
foreach Y in 2020 {
gen nrxmean = nrx`X'_mean`Y'
gen nrx1970 = nrx`X'_sh_gdp1970
xi: reg lcement19702017 nrxmean mfgservshare negchg_19802020 lupop2020 lupop1970 nrx1970 mfgserv_ca1970 larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop] if year == `Y', robust
outreg2 * using "tables\tableD14cols_1_4.xls", keep(nrxmean mfgservshare negchg_19802020) addtext(type, `X', wgts, `Z', year, 2020, ctrls, yes) se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
drop nrxmean nrx1970
lincom mfgservshare - nrxmean
lincom mfgservshare - negchg_19802020
lincom nrxmean - negchg_19802020

}
}
* Both controls *
foreach X in ag {
foreach Y in 2020 {
gen nrxmean = nrx`X'_mean`Y'
gen nrx1970 = nrx`X'_sh_gdp1970
xi: reg lcement19702017 urbrate lpcgdp2020 meanlpcgdp19602020 nrxmean mfgservshare negchg_19802020 lupop2020 lupop1970 nrx1970 mfgserv_ca1970 larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop] if year == `Y', robust
outreg2 * using "tables\tableD14cols_1_4.xls", keep(nrxmean mfgservshare urbrate lpcgdp2020 lpcgdpmean19702020 negchg_19802020) addtext(type, `X', wgts, `Z', year, 2020, ctrls, yes) se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
drop nrxmean nrx1970
lincom mfgservshare - nrxmean
lincom mfgservshare - negchg_19802020
lincom nrxmean - negchg_19802020
}
}
*** CEMENT CONSUMPTION ***
* No controls *
foreach X in ag {
foreach Y in 2020 {
gen nrxmean = nrx`X'_mean`Y'
gen nrx1970 = nrx`X'_sh_gdp1970
xi: reg lcemcons7020 nrxmean mfgservshare negchg_19802020 lupop2020 lupop1970 nrx1970 mfgserv_ca1970 larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop] if year == `Y', robust
outreg2 * using "tables\tableD14cols_1_4.xls", keep(nrxmean mfgservshare negchg_19802020) addtext(type, `X', wgts, `Z', year, 2020, ctrls, yes) se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
drop nrxmean nrx1970
lincom mfgservshare - nrxmean
lincom mfgservshare - negchg_19802020
lincom nrxmean - negchg_19802020

}
}
* Both controls *
foreach X in ag {
foreach Y in 2020 {
gen nrxmean = nrx`X'_mean`Y'
gen nrx1970 = nrx`X'_sh_gdp1970
xi: reg lcemcons7020 urbrate lpcgdp2020 meanlpcgdp19602020 nrxmean mfgservshare negchg_19802020 lupop2020 lupop1970 nrx1970 mfgserv_ca1970 larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop] if year == `Y', robust
outreg2 * using "tables\tableD14cols_1_4.xls", keep(nrxmean mfgservshare urbrate lpcgdp2020 lpcgdpmean19702020 negchg_19802020) addtext(type, `X', wgts, `Z', year, 2020, ctrls, yes) se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
drop nrxmean nrx1970
lincom mfgservshare - nrxmean
lincom mfgservshare - negchg_19802020
lincom nrxmean - negchg_19802020
}
}

************************
*** CONSTRUCTION GDP ***
************************

*** LEVEL; CONTROLLING FOR URBAN POP ***

use LACdata, clear

* GDP level
codebook pcgdp
sum pcgdp if year == 2020, d
sum pop if year == 2020, d
* In 000s
sum constr_sh_gdp
gen gdp = pcgdp*(pop*1000)
gen gdp_constr = gdp/100*constr_sh_gdp
gen lgdp_constr = log(gdp_constr)
sum gdp_constr
foreach X in 1960 1970 2020 {
gen lgdp_constr_`X' = lgdp_constr if year == `X'
bysort ccode: egen lgdpconstr`X' = max(lgdp_constr_`X')
drop lgdp_constr_`X'
}
* URBAN POP
gen lupop = log(upop)
foreach X in 1960 1970 2020 {
gen lupop_`X' = lupop if year == `X'
bysort ccode: egen lupop`X' = max(lupop_`X')
drop lupop_`X'
}
** DEINDUSTRIALIZATION **
foreach X in 1980 2000 2010 2020 {
gen indu_`X' = mfgshare if year == `X'
bysort country_gjv: egen indu`X' = max(indu_`X')
drop indu_`X'
}
gen chg_19802020 = indu2020 - indu1980
gen neg = (chg_19802020 < 0)
gen negchg_19802020 = 0 
replace negchg_19802020 = chg_19802020 if neg == 1
replace negchg_19802020 = -negchg_19802020
drop neg
gen chg_19802010 = indu2010 - indu1980
gen neg = (chg_19802010 < 0)
gen negchg_19802010 = 0 
replace negchg_19802010 = chg_19802010 if neg == 1
replace negchg_19802010 = -negchg_19802010
drop neg
gen chg_19802000 = indu2000 - indu1980
gen neg = (chg_19802000 < 0)
gen negchg_19802000 = 0 
replace negchg_19802000 = chg_19802000 if neg == 1
replace negchg_19802000 = -negchg_19802000

** DRAFT - TOTAL SUM of construction GDP 1970-2020 **

sum gdp_constr, d
*pcGDP PPP (cst 2005 intl $) WDI as base 6010, Maddison before, WDI 2017$ 1020)
foreach V in gdp_constr {
gen `V'_70 = `V' if year == 1970
bysort ccode: egen `V'70 = sum(`V'_70)
sum `V'70
gen l`V'70 = log(`V'70)
}
* Mean 70-20s, so 1970-2020
foreach V in gdp_constr {
gen `V'_7010s = `V' if year >= 1970 & year <= 2020
bysort ccode: egen `V'7010s = sum(`V'_7010s)
sum `V'7010s
gen l`V'7010s = log(`V'7010s)
}

capture erase "tables\tableD14cols_5_6.xls"
capture erase "tables\tableD14cols_5_6.tex"
capture erase "tables\tableD14cols_5_6.txt"
foreach V in lgdp_constr {
* No controls * 
foreach X in ag {
foreach Y in 2020 {
gen nrxmean = nrx`X'_mean1970`Y'
gen nrx1970 = nrx`X'_sh_gdp1970
xi: reg `V'7010s nrxmean mfgservshare negchg_19802000 lupop2020 lupop1970 nrx1970 mfgserv_ca1970 larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop`Z'] if year == `Y', robust
outreg2 nrxmean mfgservshare nrx1970 mfgserv_ca1970 using "tables\tableD14cols_5_6.xls", keep(negchg_19802000 nrxmean mfgservshare) addtext(type, `X', wgts, `Z', year, 2020, ctrls, yes) se nocons coefastr bdec(2) sdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
lincom mfgservshare - nrxmean
lincom mfgservshare - negchg_19802000
lincom nrxmean-negchg_19802000
drop nrxmean nrx1970
}
}
* Both controls * 
foreach X in ag {
foreach Y in 2020 {
gen nrxmean = nrx`X'_mean1970`Y'
gen nrx1970 = nrx`X'_sh_gdp1970
xi: reg `V'7010s nrxmean mfgservshare negchg_19802000 lupop2020 lupop1970 urbrate lpcgdp meanlpcgdp19702020 nrx1970 mfgserv_ca1970 larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop`Z'] if year == `Y', robust
outreg2 nrxmean mfgservshare nrx1970 mfgserv_ca1970 using "tables\tableD14cols_5_6.xls", keep(negchg_19802000 nrxmean mfgservshare urbrate lpcgdp lpcgdpmean19702020) addtext(type, `X', wgts, `Z', year, 2020, ctrls, yes) se nocons coefastr bdec(2) sdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
lincom mfgservshare - nrxmean
lincom mfgservshare - negchg_19802000
lincom nrxmean-negchg_19802000
drop nrxmean nrx1970
}
}
}

******************
*** SLUM SHARE ***
******************	

* Slums 1990 2000 2015
use LACdata, clear
* LPGCPD
foreach X in 1960 2010 2020 {
gen lpcgdp_`X' = lpcgdp if year == `X'
bysort ccode: egen lpcgdp`X' = max(lpcgdp_`X')
drop lpcgdp_`X'
}
* POP
foreach X in 1960 2010 2020 {
gen pop_`X' = pop if year == `X'
bysort ccode: egen pop`X' = max(pop_`X')
drop pop_`X'
}
gen pop201020 = pop2020 if year == 2020
codebook slumsh* if year == 2020

** DEINDUSTRIALIZATION **
foreach X in 1980 1990 2000 2010 2020 {
gen indu_`X' = mfgshare if year == `X'
bysort country_gjv: egen indu`X' = max(indu_`X')
drop indu_`X'
}
gen chg_19802020 = indu2020 - indu1980
gen neg = (chg_19802020 < 0)
gen negchg_19802020 = 0 
replace negchg_19802020 = chg_19802020 if neg == 1
replace negchg_19802020 = -negchg_19802020
drop neg
gen chg_19802010 = indu2010 - indu1980
gen neg = (chg_19802010 < 0)
gen negchg_19802010 = 0 
replace negchg_19802010 = chg_19802010 if neg == 1
replace negchg_19802010 = -negchg_19802010
drop neg
gen chg_19802000 = indu2000 - indu1980
gen neg = (chg_19802000 < 0)
gen negchg_19802000 = 0 
replace negchg_19802000 = chg_19802000 if neg == 1
replace negchg_19802000 = -negchg_19802000

* 1990-2020
drop neg
gen chg_19902020 = indu2020 - indu1990
gen neg = (chg_19902020 < 0)
gen negchg_19902020 = 0 
replace negchg_19902020 = chg_19902020 if neg == 1
replace negchg_19902020 = -negchg_19902020

* 1990-2010
drop neg
gen chg_19902010 = indu2010 - indu1990
gen neg = (chg_19902010 < 0)
gen negchg_19902010 = 0 
replace negchg_19902010 = chg_19902010 if neg == 1
replace negchg_19902010 = -negchg_19902010

*** New regressions ***
* BETTER WITH 2010 THAN 2020

capture erase "tables\tableD14cols_7_8.xls"
capture erase "tables\tableD14cols_7_8.tex"
capture erase "tables\tableD14cols_7_8.txt"
local lincomset0 "mfgservshare-nrxmean"
** 2015 **
*foreach Y in 2020 2010 {
foreach Y in 2010 {
* No controls *
foreach X in ag {
gen nrxmean = nrx`X'_mean`Y'
gen nrx1960 = nrx`X'_sh_gdp1960
local lincomset1 "mfgservshare-negchg_1980`Y'"
xi: reg slumsh2015 nrxmean mfgservshare negchg_1980`Y' nrx1960 mfgserv_ca1960 larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop] if year == `Y', robust
proglincom "`lincomset0'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset1'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
outreg2 * using "tables\tableD14cols_7_8.xls", keep(nrxmean mfgservshare negchg_1980*) addtext(nrxcoef, "`coeff1'", nrxse, "`se1'", mfgservcoef, "`coeff2'", mfgservse, "`se2'") se nocons coefastr bdec(2) sdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
lincom nrxmean-negchg_1980`Y'
drop nrxmean nrx1960
}

* Both controls *
foreach X in ag {
gen nrxmean = nrx`X'_mean`Y'
gen nrx1960 = nrx`X'_sh_gdp1960
local lincomset1 "mfgservshare-negchg_1980`Y'"
xi: reg slumsh2015 urbrate lpcgdp2020 meanlpcgdp19602020 nrxmean mfgservshare negchg_1980`Y' nrx1960 mfgserv_ca1960 larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop] if year == `Y', robust
proglincom "`lincomset0'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset1'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
outreg2 * using "tables\tableD14cols_7_8.xls", keep(nrxmean mfgservshare negchg_1980*) addtext(nrxcoef, "`coeff1'", nrxse, "`se1'", mfgservcoef, "`coeff2'", mfgservse, "`se2'") se nocons coefastr bdec(2) sdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
lincom nrxmean-negchg_1980`Y'
drop nrxmean nrx1960
}
}

*****************
*****************
**# TABLE D10 #3
*****************
*****************

** MAIN VARIABLES COUNTRY LEVEL (N = 116) **
use LACdata, clear
** DEINDUSTRIALIZATION **
foreach X in 1980 2015 {
gen indu_`X' = mfgshare if year == `X'
bysort country_gjv: egen indu`X' = max(indu_`X')
drop indu_`X'
}
gen chg_19802015 = indu2015 - indu1980
gen neg = (chg_19802015 < 0)
gen negchg_19802015 = 0 
replace negchg_19802015 = chg_19802015 if neg == 1
replace negchg_19802015 = -negchg_19802015
sum negchg_19802015, d
drop nrxag_sh_gdp1*
foreach X in 1960 1975 {
gen nrxag_sh_gdp`X' = nrxag_sh_gdp if year == `X'
bysort ccode: egen nrx`X' = max(nrxag_sh_gdp`X')
}
keep if year >= 1975 & year <= 2015
bysort ccode: egen nrxmean = mean(nrxag_sh_gdp)
* TYPE OF NRX *
bysort ccode: egen agrimean = mean(agri_sh_gdp)
bysort ccode: egen minfuelmean = mean(minfuel_sh_gdp)
keep if year == 2015 
keep country_wb nrxmean agrimean minfuelmean mfgservshare nrxag_sh_gdp1* pop larea larea_sq lpop lpop_sq smallisland type threshold lthreshold_level negchg_19802015
gen cntry_name = country_wb
replace cntry_name = subinstr(cntry_name, " ", "",.) 
replace cntry_name = subinstr(cntry_name, "-", "",.) 
replace cntry_name = "Bahamas" if cntry_name == "Bahamas,The"
replace cntry_name = "Brunei" if cntry_name == "BruneiDarussalam"
replace cntry_name = "CapeVerde" if cntry_name == "CaboVerde"
replace cntry_name = "CotedIvoire" if cntry_name == "Coted'Ivoire"
replace cntry_name = "DemocraticRepublicoftheCongo" if cntry_name == "Congo,Dem.Rep."
replace cntry_name = "Egypt" if cntry_name == "Egypt,ArabRep."
*replace cntry_name = "FrenchGuiana	" if cntry_name == ""
replace cntry_name = "Gambia" if cntry_name == "Gambia,The"
*replace cntry_name = "Guadeloupe	" if cntry_name == ""
replace cntry_name = "HongKong" if cntry_name == "HongKongSAR,China"
replace cntry_name = "Iran" if cntry_name == "Iran,IslamicRep."
replace cntry_name = "Kyrgyzstan" if cntry_name == "KyrgyzRepublic"
replace cntry_name = "Laos" if cntry_name == "LaoPDR"
replace cntry_name = "Macao" if cntry_name == "MacaoSAR,China"
replace cntry_name = "Macedonia" if cntry_name == "NorthMacedonia"
*replace cntry_name = "Martinique	" if cntry_name == ""
*replace cntry_name = "Mayotte	" if cntry_name == ""
replace cntry_name = "NorthKorea" if cntry_name == "Korea,Dem.People'sRep."
replace cntry_name = "Palestina" if cntry_name == "WestBankandGaza"
replace cntry_name = "RepublicofCongo" if cntry_name == "Congo,Rep."
*replace cntry_name = "Reunion	" if cntry_name == ""
replace cntry_name = "Russia" if cntry_name == "RussianFederation"
replace cntry_name = "Slovakia" if cntry_name == "SlovakRepublic"
replace cntry_name = "SouthKorea" if cntry_name == "Korea,Rep."
replace cntry_name = "Swaziland" if cntry_name == "Eswatini"
replace cntry_name = "Syria" if cntry_name == "SyrianArabRepublic"
*replace cntry_name = "Taiwan	" if cntry_name == ""
replace cntry_name = "Venezuela" if cntry_name == "Venezuela,RB"
*replace cntry_name = "WesternSahara	" if cntry_name == ""
replace cntry_name = "Yemen" if cntry_name == "Yemen,Rep."
replace cntry_name = "CotedIvoire" if cntry_name == "Côted'Ivoire"
sort cntry_name
save temp_ctrls, replace
count
* 116

** CITY-LEVEL DATA **
* How many FUAs in the world in 2015?
import delimited "fuas.csv", clear
sort efua_id
save fuas, replace
count
codebook efua_id
* 9031
* 6830/9031 = 75.6

* Share of FUA's world pop. that we have. 
use fuas, clear
keep efua_id fua_p_2015
replace fua_p_2015 = subinstr(fua_p_2015, ",", "",.) 
destring fua_p_2015, replace
count
* 9031
sort efua_id
save temp, replace

** We merge **

** Capital **

use "capitals_FUAs.dta", clear
*tab capital
*collapse (max) capital, by(cntry_name)
*tab capital
*tab cntry_name if capital == 0
keep efua_id  capital
sort efua_id 
save capitals, replace

* The three files below are created in the folder "IPUMS and other files"
use "fua_uc_pop_max.dta", clear
sort efua_id
merge efua_id using capitals
tab _m
drop _m
tab capital, m
replace capital = 1 if capital == 2
sort efua_id
merge efua_id using fua_uc_pop_sh
tab _m
drop _m
sort efua_id
merge efua_id using fua_uc_pop_wgt
tab _m
drop _m
sort efua_id
merge efua_id using temp
tab _m
drop _m
corr p15* fua_p_2015
sum p15* fua_p_2015
* max = largest UC in the FUA (but some UCs are larger than the FUA)
* wgt = weighted pop of all UCs
* sh = using some info on UC pop in 2015 from FUA pop dataset (their team probably corrected the UC pop data in some ways so that the share is always lower than 100)
codebook cntry_name
* 9031 in 188
sort cntry_name
merge cntry_name using temp_ctrls
tab cntry_name if _m == 2
*tab cntry_name if _m == 1
keep if _m == 3
codebook cntry_name
* 7422 in 115
*collapse (sum) fua_p_2015
* About 3,101,259,268 individuals
* World urban pop 2015 (WUP) = 3,981,498,000
* So 77.9% = 78% 
*keep cntry_name
*bysort cntry_name: keep if _n == 1
*sort cntry_name
*save cntries_capitals_federico, replace
** growth variables 1975-2015 **
sum p*_adj_max, d
foreach X of varlist p*_adj_max {
sum `X' if `X' > 0
}
foreach X of varlist p*_adj_max {
count if `X' != 0
}
foreach X of varlist p*_adj_* {
replace `X' = 0 if `X' == .
gen l`X' = log(`X')
gen l1`X' = log(`X'+1)
}
foreach X in max sh wgt {
gen gr7515_`X' = (lp15_adj_`X' - lp75_adj_`X')*100/40
gen gr7515_`X'1 = (l1p15_adj_`X' - l1p75_adj_`X')*100/40
sum gr7515_`X' gr7515_`X'1 [w=fua_p_2015]
}
* divided by 40 yrs, we get about 2.4% annual 
** ranking 2015 **
gsort+ cntry_name -fua_p_2015
bysort cntry_name: gen rank15 = _n
tab efua_name if rank15 == 1
foreach X in 1 2 5 {
gen top`X' = (capital == 1 | (rank15 >= 1 & rank15 <= `X'))
}
gen top0 = (capital == 1)

** WITH COUNTRY FE, WE DONT NEED CNTRY CTRLS **

** GROWTH LHS **
** MAIN SECTORS **
capture erase "tables\tableD10.xls"
capture erase "tables\tableD10.tex"
capture erase "tables\tableD10.txt"
*foreach X in wgt max {
* We use the wgt version 
foreach X in wgt {
foreach T in 1 2 5 0 {
*foreach X in max {
*foreach T in 1 {
local lincomset "_ItopXmfgse_1 - _ItopXnrxme_1"
** DEINDU **
local lincomset0 "_ItopXmfgse_1 - _ItopXnrxme_1"
local lincomset1 "_ItopXmfgse_1 - _ItopXnegch_1"
** LOG X + 1 **
xi: areg gr7515_`X'1 i.top`T'|nrxmean i.top`T'|mfgservshare i.top`T'|negchg_19802015 i.top`T' [w=fua_p_2015], absorb(country_wb) robust clust(country_wb)
proglincom "`lincomset0'"
local coeff0 = r(coefftxt)
local se0 = r(setxt)
proglincom "`lincomset1'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
** LOG X + 1 + CTRL 75 **
xi: areg gr7515_`X'1 l1p75_adj_`X' i.top`T'|nrxmean i.top`T'|mfgservshare i.top`T'|negchg_19802015 i.top`T' [w=fua_p_2015], absorb(country_wb) robust clust(country_wb)
proglincom "`lincomset0'"
local coeff0 = r(coefftxt)
local se0 = r(setxt)
proglincom "`lincomset1'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
outreg2 nrxmean mfgservshare using "tables\tableD10.xls", keep(l1p75_adj_`X' *topX* nrxmean mfgservshare negchg_19802020) addtext(nrxcoef, "`coeff0'", nrxse, "`se0'", deinducoef, "`coeff1'", deinduse, "`se1'", top, `T') se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
}
** DECOMPOSING NRX **
foreach T in 1 2 5 0 {
local lincomset0 "_ItopXmfgse_1 - _ItopXagrim_1"
local lincomset1 "_ItopXmfgse_1 - _ItopXminfu_1"
local lincomset2 "_ItopXmfgse_1 - _ItopXnegch_1"
** LOG X + 1 **
xi: areg gr7515_`X'1 i.top`T'|agrimean i.top`T'|minfuelmean i.top`T'|mfgservshare i.top`T'|negchg_19802015 i.top`T' [w=fua_p_2015], absorb(country_wb) robust clust(country_wb)
proglincom "`lincomset0'"
local coeff0 = r(coefftxt)
local se0 = r(setxt)
proglincom "`lincomset1'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset2'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
** LOG X + 1 + CTRL **
xi: areg gr7515_`X'1 l1p75_adj_`X' i.top`T'|agrimean i.top`T'|minfuelmean i.top`T'|mfgservshare i.top`T'|negchg_19802015 i.top`T' [w=fua_p_2015], absorb(country_wb) robust clust(country_wb)
proglincom "`lincomset0'"
local coeff0 = r(coefftxt)
local se0 = r(setxt)
proglincom "`lincomset1'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset2'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
outreg2 nrxmean mfgservshare using "tables\tableD10.xls", keep(l1p75_adj_`X' *topX* nrxmean mfgservshare negchg_19802020) addtext(agrcoef, "`coeff0'", agrse, "`se0'", mincoef, "`coeff1'", minse, "`se1'", deinducoef, "`coeff2'", deinduse, "`se2'", top, `T') se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
}
}

***************
***************
**# TABLE D9 #4
***************
***************

***** CROSS-SECTION *****

use LACdata, clear
foreach X in 1960 2010 2020 {
gen pop_`X' = pop if year == `X'
bysort ccode: egen pop`X' = max(pop_`X')
drop pop_`X'
}
gen pop201020 = pop2020 if year == 2020
foreach X in 1960 {
gen primacy_rate_`X' = primacy_rate if year == `X'
bysort ccode: egen primacy_rate`X' = max(primacy_rate_`X')
drop primacy_rate_`X'
}
* DEINDUSTRILIZATION * 
foreach X in 1960 1980 1990 2020 {
gen indu_`X' = mfgshare if year == `X'
bysort country_gjv: egen indu`X' = max(indu_`X')
drop indu_`X'
}
sum indu*
gen chg_19602020 = indu2020 - indu1960
gen chg_19802020 = indu2020 - indu1980
gen chg_19902020 = indu2020 - indu1990
gen neg = (chg_19802020 < 0)
gen negchg_19802020 = 0 
replace negchg_19802020 = chg_19802020 if neg == 1
corr chg_19802020 chg_19902020 [w=pop] if year == 2020
corr chg_19802020 chg_19902020 [w=pop] if LAC == 1 & year == 2020
corr chg_19802020 chg_19602020 [w=pop] if year == 2020
corr chg_19802020 chg_19602020 [w=pop] if LAC == 1 & year == 2020
sum chg_19802020 chg_19902020 [w=pop] if LAC == 1 & year == 2020
replace negchg_19802020 = -negchg_19802020
sum negchg_19802020, d
** MIN-FUEL CASH **
* TYPE OF NRX *
bysort ccode: egen agrimean = mean(agri_sh_gdp)
bysort ccode: egen minfuelmean = mean(minfuel_sh_gdp)
* Share agri and minfuel 1960 *
foreach V in agri_sh_gdp minfuel_sh_gdp {
foreach X in 1960 {
gen `V'`X'2 = `V' if year == `X'
bysort ccode: egen `V'`X' = max(`V'`X'2)
drop `V'`X'2
}
}

capture erase "tables\tableD9_cols_1_2.xls"
capture erase "tables\tableD9_cols_1_2.tex"
capture erase "tables\tableD9_cols_1_2.txt"
* NRX * 
foreach X in ag {
foreach Z in 201020 {
foreach Y in 2020 {
gen nrxmean = nrx`X'_mean`Y'
gen nrx1960 = nrx`X'_sh_gdp1960
local lincomset0 "mfgservshare-nrxmean"
local lincomset1 "mfgservshare-chg_19802020"
* Primacy * 
xi: reg primacy_rate urbrate nrxmean mfgservshare chg_19802020 primacy_rate1960 urbrate1960 nrx1960 mfgserv_ca1960 larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop`Z'] if year == `Y', robust
proglincom "`lincomset0'"
local coeff0 = r(coefftxt)
local se0 = r(setxt)
proglincom "`lincomset1'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
outreg2 * using "tables\tableD9_cols_1_2.xls", addtext(mfgnrx, "`coeff0'", se0, "`se0'", mfgdeindu, "`coeff1'", se1, "`se1'") keep(urbrate nrxmean mfgservshare chg_19802020) se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
drop nrxmean nrx1960
}
}
}
* DECOMPOSE NRX *
foreach X in ag {
foreach Z in 201020 {
foreach Y in 2020 {
gen nrxmean = nrx`X'_mean`Y'
gen nrx1960 = nrx`X'_sh_gdp1960
** Deindu **
local lincomset0 "mfgservshare-agrimean"
local lincomset1 "mfgservshare-minfuelmean"
local lincomset2 "mfgservshare-chg_19802020"
* Primacy * 
xi: reg primacy_rate urbrate agrimean minfuelmean mfgservshare chg_19802020 primacy_rate1960 urbrate1960  agri_sh_gdp1960 minfuel_sh_gdp1960 mfgserv_ca1960 larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop`Z'] if year == `Y', robust
proglincom "`lincomset0'"
local coeff0 = r(coefftxt)
local se0 = r(setxt)
proglincom "`lincomset1'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset2'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
outreg2 * using "tables\tableD9_cols_1_2.xls", addtext(mfgagri, "`coeff0'", se0, "`se0'", mfgminfuel, "`coeff1'", se1, "`se1'", mfgdeindu, "`coeff2'", se2, "`se2'") keep(urbrate agrimean minfuelmean mfgservshare chg_19802020) se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
drop nrxmean nrx1960
}
}
}

***** PANEL *****

use LACdata, clear
gen lastyr = substr(string(year),4,1)
tab lastyr
destring lastyr, replace 
drop if lastyr == 5
keep if year >= 1960 & year <= 2020
foreach X in 1960 2020 {
gen pop_`X' = pop if year == `X'
bysort ccode: egen pop`X' = max(pop_`X')
drop pop_`X'
}
gen popt = pop
* Deindustrialization variables *
foreach X in 1980 {
gen indu_`X' = mfgshare if year == `X'
bysort country_gjv: egen indu`X' = max(indu_`X')
drop indu_`X'
}
gen indu = mfgshare
gen chg_1980t = indu - indu1980
gen neg = (chg_1980t < 0)
gen negchg_1980t = 0 
replace negchg_1980t = chg_1980t if neg == 1 & year >= 1980
sum negchg_1980t, d
bysort year: sum negchg_1980t
sum mfgservshare negchg_1980t
corr mfgservshare negchg_1980t [w=pop2020]
* -0.46
foreach X in 1 2 3 4 {
sort ccode year
bysort ccode: gen lag`X'nrxag_sh_gdp = nrxag_sh_gdp[_n-`X']
sort ccode year
bysort ccode: gen lag`X'mfgservshare = mfgservshare[_n-`X']
sort ccode year
bysort ccode: gen lag`X'agri_sh_gdp = agri_sh_gdp[_n-`X']
sort ccode year
bysort ccode: gen lag`X'minfuel_sh_gdp = minfuel_sh_gdp[_n-`X']
sort ccode year
bysort ccode: gen lag`X'negchg_1980t = negchg_1980t[_n-`X']
}
foreach X in 1 {
sort ccode year
bysort ccode: gen lead`X'nrxag_sh_gdp = nrxag_sh_gdp[_n+`X']
sort ccode year
bysort ccode: gen lead`X'mfgservshare = mfgservshare[_n+`X']
}
sum primacy_rate
desc agri_sh_gdp minfuel_sh_gdp

** NRX ** 

capture erase "tables\tableD9_cols_3_6.xls"
capture erase "tables\tableD9_cols_3_6.tex"
capture erase "tables\tableD9_cols_3_6.txt"
** ADDING URBRATE **
* 3 LAGS
foreach C in ag {
foreach Z in 2020 {
local lincomset0 "lag1nrx`C'_sh_gdp + lag2nrx`C'_sh_gdp + lag3nrx`C'_sh_gdp"
local lincomset1 "mfgservshare + lag1mfgservshare + lag2mfgservshare"
local lincomset2 "negchg_1980t + lag1negchg_1980t + lag2negchg_1980t"
xi: areg primacy_rate urbrate lag1nrx`C'_sh_gdp lag2nrx`C'_sh_gdp lag3nrx`C'_sh_gdp mfgservshare lag1mfgservshare lag2mfgservshare negchg_1980t lag1negchg_1980t lag2negchg_1980t lpop lpop_sq i.year [w=pop`Z'] if year <= 2020, robust absorb(ccode) clust(ccode)
proglincom "`lincomset0'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset1'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
proglincom "`lincomset2'"
local coeff3 = r(coefftxt)
local se3 = r(setxt)
outreg2 * using "tables\tableD9_cols_3_6.xls", keep(urbrate *nrx* *mfgservshare* *negchg_1980t) addtext(nrxcoef, "`coeff1'", nrxse, "`se1'", mfgservcoef, "`coeff2'", mfgservse, "`se2'", deinducoef, "`coeff3'", deinduse, "`se3'") se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
lincom "(mfgservshare + lag1mfgservshare + lag2mfgservshare)-(lag1nrx`C'_sh_gdp + lag2nrx`C'_sh_gdp + lag3nrx`C'_sh_gdp)"
lincom "(mfgservshare + lag1mfgservshare + lag2mfgservshare)-(negchg_1980t + lag1negchg_1980t + lag2negchg_1980t)"
}
}
* 4 LAGS
foreach C in ag {
foreach Z in 2020 {
local lincomset0 "lag1nrx`C'_sh_gdp + lag2nrx`C'_sh_gdp + lag3nrx`C'_sh_gdp + lag4nrx`C'_sh_gdp"
local lincomset1 "mfgservshare + lag1mfgservshare + lag2mfgservshare + lag3mfgservshare"
local lincomset2 "negchg_1980t + lag1negchg_1980t + lag2negchg_1980t + lag3negchg_1980t"
xi: areg primacy_rate urbrate lag1nrx`C'_sh_gdp lag2nrx`C'_sh_gdp lag3nrx`C'_sh_gdp lag4nrx`C'_sh_gdp mfgservshare lag1mfgservshare lag2mfgservshare lag3mfgservshare negchg_1980t lag1negchg_1980t lag2negchg_1980t lag3negchg_1980t lpop lpop_sq i.year [w=pop`Z'] if year <= 2020, robust absorb(ccode) clust(ccode)
proglincom "`lincomset0'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset1'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
proglincom "`lincomset2'"
local coeff3 = r(coefftxt)
local se3 = r(setxt)
outreg2 * using "tables\tableD9_cols_3_6.xls", keep(urbrate *nrx* *mfgservshare* *negchg_1980t) addtext(nrxcoef, "`coeff1'", nrxse, "`se1'", mfgservcoef, "`coeff2'", mfgservse, "`se2'", deinducoef, "`coeff3'", deinduse, "`se3'") se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
lincom "(mfgservshare + lag1mfgservshare + lag2mfgservshare + lag3mfgservshare)-(lag1nrx`C'_sh_gdp + lag2nrx`C'_sh_gdp + lag3nrx`C'_sh_gdp + lag4nrx`C'_sh_gdp)"
lincom "(mfgservshare + lag1mfgservshare + lag2mfgservshare + lag3mfgservshare)-(negchg_1980t + lag1negchg_1980t + lag2negchg_1980t + lag3negchg_1980t)"
}
}

** MIN-FUEL VS CASH ** 
** ADDING URBRATE **
* 3 LAGS
foreach C in ag {
foreach Z in 2020 {
local lincomset0 "lag1agri_sh_gdp + lag2agri_sh_gdp + lag3agri_sh_gdp"
local lincomset1 "lag1minfuel_sh_gdp + lag2minfuel_sh_gdp + lag3minfuel_sh_gdp"
local lincomset2 "mfgservshare + lag1mfgservshare + lag2mfgservshare"
local lincomset3 "negchg_1980t + lag1negchg_1980t + lag2negchg_1980t"
xi: areg primacy_rate urbrate lag1agri_sh_gdp lag2agri_sh_gdp lag3agri_sh_gdp lag1minfuel_sh_gdp lag2minfuel_sh_gdp lag3minfuel_sh_gdp mfgservshare lag1mfgservshare lag2mfgservshare negchg_1980t lag1negchg_1980t lag2negchg_1980t lpop lpop_sq i.year [w=pop`Z'] if year <= 2020, robust absorb(ccode) clust(ccode)
proglincom "`lincomset0'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset1'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
proglincom "`lincomset2'"
local coeff3 = r(coefftxt)
local se3 = r(setxt)
proglincom "`lincomset3'"
local coeff4 = r(coefftxt)
local se4 = r(setxt)
outreg2 * using "tables\tableD9_cols_3_6.xls", keep(urbrate *lag* *mfgservshare* *negchg_1980t) addtext(agricoef, "`coeff1'", agrise, "`se1'", fuelmincoef, "`coeff2'", fuelminse, "`se2'", mfgservcoef, "`coeff3'", mfgservse, "`se3'", deinducoef, "`coeff4'", deinduse, "`se4'") se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
lincom "(mfgservshare + lag1mfgservshare + lag2mfgservshare)-(lag1minfuel_sh_gdp + lag2minfuel_sh_gdp + lag3minfuel_sh_gdp)"
lincom "(mfgservshare + lag1mfgservshare + lag2mfgservshare)-(lag1agri_sh_gdp + lag2agri_sh_gdp + lag3agri_sh_gdp)"
lincom "(mfgservshare + lag1mfgservshare + lag2mfgservshare)-(negchg_1980t + lag1negchg_1980t + lag2negchg_1980t)"
}
}
* 4 LAGS
foreach C in ag {
foreach Z in 2020 {
local lincomset0 "lag1agri_sh_gdp + lag2agri_sh_gdp + lag3agri_sh_gdp + lag4agri_sh_gdp"
local lincomset1 "lag1minfuel_sh_gdp + lag2minfuel_sh_gdp + lag3minfuel_sh_gdp + lag4minfuel_sh_gdp"
local lincomset2 "mfgservshare + lag1mfgservshare + lag2mfgservshare + lag3mfgservshare"
local lincomset3 "negchg_1980t + lag1negchg_1980t + lag2negchg_1980t + lag3negchg_1980t"
xi: areg primacy_rate urbrate lag1agri_sh_gdp lag2agri_sh_gdp lag3agri_sh_gdp lag4agri_sh_gdp lag1minfuel_sh_gdp lag2minfuel_sh_gdp lag3minfuel_sh_gdp lag4minfuel_sh_gdp mfgservshare lag1mfgservshare lag2mfgservshare lag3mfgservshare negchg_1980t lag1negchg_1980t lag2negchg_1980t lag3negchg_1980t lpop lpop_sq i.year [w=pop`Z'] if year <= 2020, robust absorb(ccode) clust(ccode)
proglincom "`lincomset0'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset1'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
proglincom "`lincomset2'"
local coeff3 = r(coefftxt)
local se3 = r(setxt)
proglincom "`lincomset3'"
local coeff4 = r(coefftxt)
local se4 = r(setxt)
outreg2 * using "tables\tableD9_cols_3_6.xls", keep(urbrate *lag* *mfgservshare* *negchg_1980t) addtext(agricoef, "`coeff1'", agrise, "`se1'", fuelmincoef, "`coeff2'", fuelminse, "`se2'", mfgservcoef, "`coeff3'", mfgservse, "`se3'", deinducoef, "`coeff4'", deinduse, "`se4'") se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
lincom "(mfgservshare + lag1mfgservshare + lag2mfgservshare + lag3mfgservshare)-(lag1minfuel_sh_gdp + lag2minfuel_sh_gdp + lag3minfuel_sh_gdp + lag4minfuel_sh_gdp)"
lincom "(mfgservshare + lag1mfgservshare + lag2mfgservshare + lag3mfgservshare)-(lag1agri_sh_gdp + lag2agri_sh_gdp + lag3agri_sh_gdp + lag4agri_sh_gdp)"
lincom "(mfgservshare + lag1mfgservshare + lag2mfgservshare + lag3mfgservshare)-(negchg_1980t + lag1negchg_1980t + lag2negchg_1980t + lag3negchg_1980t)"
}
}

*****************
*****************
**# TABLE D11 #5
*****************
*****************

** Country info **

use LACdata, clear
* For 1975 1990 2000 and 2015, we keep these as well as 1960
keep if year == 1960 | year == 1975 | year == 1990 | year == 2000 | year == 2015
keep country_wb year mfgservshare upop nrxag_sh_gdp agri_sh_gdp minfuel_sh_gdp mfgshare country_gjv
gen cntry_name = country_wb
replace cntry_name = subinstr(cntry_name, " ", "",.) 
replace cntry_name = subinstr(cntry_name, "-", "",.) 
replace cntry_name = "Bahamas" if cntry_name == "Bahamas,The"
replace cntry_name = "Brunei" if cntry_name == "BruneiDarussalam"
replace cntry_name = "CapeVerde" if cntry_name == "CaboVerde"
replace cntry_name = "CotedIvoire" if cntry_name == "Coted'Ivoire"
replace cntry_name = "DemocraticRepublicoftheCongo" if cntry_name == "Congo,Dem.Rep."
replace cntry_name = "Egypt" if cntry_name == "Egypt,ArabRep."
*replace cntry_name = "FrenchGuiana	" if cntry_name == ""
replace cntry_name = "Gambia" if cntry_name == "Gambia,The"
*replace cntry_name = "Guadeloupe	" if cntry_name == ""
replace cntry_name = "HongKong" if cntry_name == "HongKongSAR,China"
replace cntry_name = "Iran" if cntry_name == "Iran,IslamicRep."
replace cntry_name = "Kyrgyzstan" if cntry_name == "KyrgyzRepublic"
replace cntry_name = "Laos" if cntry_name == "LaoPDR"
replace cntry_name = "Macao" if cntry_name == "MacaoSAR,China"
replace cntry_name = "Macedonia" if cntry_name == "NorthMacedonia"
*replace cntry_name = "Martinique	" if cntry_name == ""
*replace cntry_name = "Mayotte	" if cntry_name == ""
replace cntry_name = "NorthKorea" if cntry_name == "Korea,Dem.People’sRep."
replace cntry_name = "Palestina" if cntry_name == "WestBankandGaza"
replace cntry_name = "RepublicofCongo" if cntry_name == "Congo,Rep."
*replace cntry_name = "Reunion	" if cntry_name == ""
replace cntry_name = "Russia" if cntry_name == "RussianFederation"
replace cntry_name = "Slovakia" if cntry_name == "SlovakRepublic"
replace cntry_name = "SouthKorea" if cntry_name == "Korea,Rep."
replace cntry_name = "Swaziland" if cntry_name == "Eswatini"
replace cntry_name = "Syria" if cntry_name == "SyrianArabRepublic"
*replace cntry_name = "Taiwan	" if cntry_name == ""
replace cntry_name = "Venezuela" if cntry_name == "Venezuela,RB"
*replace cntry_name = "WesternSahara	" if cntry_name == ""
replace cntry_name = "Yemen" if cntry_name == "Yemen,Rep."
replace cntry_name = "CotedIvoire" if cntry_name == "Côted'Ivoire"
sort cntry_name year
save temp_ctrls, replace
tab year
* 116 * 5 
codebook cntry_name
* 116

** CITY-LEVEL DATA **
* How many FUAs in the world in 2015?
import delimited "fuas.csv", clear
sort efua_id
save fuas, replace
count
codebook efua_id
* 9031
* 6830/9031 = 75.6

* Share of FUA's world pop. that we have. 
use fuas, clear
keep efua_id fua_p_2015
replace fua_p_2015 = subinstr(fua_p_2015, ",", "",.) 
destring fua_p_2015, replace
count
* 9031
sort efua_id
save temp, replace

** City panel **

use "fua_uc_pop_max.dta", clear
sort efua_id
merge efua_id using fua_uc_pop_sh
tab _m
drop _m
sort efua_id
merge efua_id using fua_uc_pop_wgt
tab _m
drop _m
sort efua_id
merge efua_id using temp
tab _m
drop _m
sort efua_id
merge efua_id using capitals
tab _m
drop _m
corr p15* fua_p_2015
sum p15* fua_p_2015
foreach Z in max sh wgt {
foreach X in 75 90 00 15 {
ren p`X'_adj_`Z' p`Z'`X'
}
}
foreach Z in max sh wgt {
ren p`Z'00 p`Z'2000 
}
reshape long pmax psh pwgt, i(cntry_name efua_id efua_name fua_p_2015) j(year)
tab year
replace year = 1975 if year == 75
replace year = 1990 if year == 90
replace year = 2015 if year == 15
sort cntry_name efua_id efua_name year
tab year
* 9031
save temp2, replace

** We merge **

use temp2, clear
keep if year == 1975
replace year = 1960 if year == 1975
replace pmax = .
replace psh = .
replace pwgt = .
append using temp2
sort efua_id efua_name cntry_name year
sort cntry_name year 
merge cntry_name year using temp_ctrls
tab cntry_name if _m == 2
keep if _m == 3
drop _m
codebook cntry_name
* 115

foreach X in 1975 1990 {
gen indu_`X' = mfgshare if year == `X'
bysort country_gjv: egen indu`X' = max(indu_`X')
drop indu_`X'
}
gen indu = mfgshare
foreach X in 1975 1990 {
gen chg_`X't = indu - indu`X'
gen neg = (chg_`X't < 0)
gen negchg_`X't = 0 
replace negchg_`X't = chg_`X't if neg == 1 & year >= `X'
sum negchg_`X't, d
bysort year: sum negchg_`X't
sum mfgservshare negchg_`X't
drop neg
}

foreach X in 1 2 3 4 {
sort cntry_name efua_id year
bysort cntry_name efua_id: gen lag`X'nrxag_sh_gdp = nrxag_sh_gdp[_n-`X']
sort cntry_name efua_id year
bysort cntry_name efua_id: gen lag`X'mfgservshare = mfgservshare[_n-`X']
sort cntry_name efua_id year
bysort cntry_name efua_id: gen lag`X'agri_sh_gdp = agri_sh_gdp[_n-`X']
sort cntry_name efua_id year
bysort cntry_name efua_id: gen lag`X'minfuel_sh_gdp = minfuel_sh_gdp[_n-`X']
sort cntry_name efua_id year
bysort cntry_name efua_id: gen lag`X'negchg_1975t = negchg_1975t[_n-`X']
sort cntry_name efua_id year
bysort cntry_name efua_id: gen lag`X'negchg_1990t = negchg_1990t[_n-`X']
}

foreach X of varlist pmax psh pwgt {
replace `X' = 0 if `X' == .
gen l`X' = log(`X')
gen l1`X' = log(`X'+1)
} 
** ranking 2015 **
gsort+ cntry_name year -fua_p_2015
bysort cntry_name year: gen rank15 = _n
tab efua_name if rank15 == 1
foreach X in 1 2 5 {
gen top`X' = (capital == 1 | (rank15 >= 1 & rank15 <= `X'))
}
gen top0 = (capital == 1)
foreach X of varlist pmax psh pwgt {
sort cntry_name year
bysort cntry_name: gen lag11l1`X' = l1`X'[_n-1]
}
egen code = group(cntry_name)
gen codeyr = string(code)+string(year)
set matsize 10000
tab year if year >= 1975

* REGRESSIONS *
* 1 LAG * 
capture erase "tables\tableD11.xls"
capture erase "tables\tableD11.tex"
capture erase "tables\tableD11.txt"
foreach D in 1975 1990 {
foreach X of varlist pwgt {
foreach N in 1 2 5 0 {
local lincomset0 "(_ItopXmfgse_1)"
local lincomset1 "(_ItopXlag1n_1)"
local lincomset2 "(_ItopXnegch_1)"
xi: areg l1`X' i.top`N'|lag1nrxag_sh_gdp i.top`N'|mfgservshare i.top`N'|negchg_`D't i.codeyr [w=fua_p_2015], robust absorb(efua_id) clust(cntry_name)
proglincom "`lincomset0'"
local coeff0 = r(coefftxt)
local se0 = r(setxt)
proglincom "`lincomset1'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset2'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
outreg2 lag1nrx* mfg_sh_x using "tables\tableD11.xls", keep(*topX*) addtext(yearused, `D', mfgservcoef, "`coeff0'", mfgservse, "`se0'", nrxcoef, "`coeff1'", nrxse, "`se1'", deinducoef, "`coeff2'", deinduse, "`se2'") se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
}
}
* 2 LAGs * 
local lincomset0 "(_ItopXmfgse_1+_ItopXlag1m_1)"
local lincomset1 "(_ItopXlag1n_1+_ItopXlag2n_1)"
local lincomset2 "(_ItopXnegch_1+_ItopXlag1na1)"
foreach X of varlist pwgt {
foreach N in 1 2 5 0 {
xi: areg l1`X' i.top`N'|lag1nrxag_sh_gdp i.top`N'|mfgservshare i.top`N'|lag2nrxag_sh_gdp i.top`N'|lag1mfgservshare i.top`N'|negchg_`D't i.top`N'|lag1negchg_1975t i.codeyr [w=fua_p_2015], robust absorb(efua_id) clust(cntry_name)
proglincom "`lincomset0'"
local coeff0 = r(coefftxt)
local se0 = r(setxt)
proglincom "`lincomset1'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset2'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
outreg2 lag1nrx* mfg_sh_x using "tables\tableD11.xls", keep(*topX*) addtext(yearused, `D', mfgservcoef, "`coeff0'", mfgservse, "`se0'", nrxcoef, "`coeff1'", nrxse, "`se1'", deinducoef, "`coeff2'", deinduse, "`se2'") se nocons coefastr bdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
}
}
}

****************
**# TABLE D16 #1
**************** 

*** GINI OF THE PRODUCTION CITYNESS INDEX ***

* We use list_ginis created in the folder "Mapping"
use list_ginis, clear
sum gini, d
sort country_wb
save list_ginis, replace
count
* 74

*** REGIONAL INEQUALITY BASED ON PRIMACY OF NIGHT LIGHTS WITHIN THE GHS BOUNDARIES ***

* We create a primacy rate index based on night lights ca. 2000, 2010 or 2011
* See the folder "Other Files\Lights" for how the file is created
* We generate the primacy rates based top 1 2 5
foreach Y in 00 10 11 {
use lights, clear
gsort cntry_name -sumntlrc`Y'
bysort cntry_name: gen rank = _n
foreach X in 1 2 5 2000 {
gen ntl`X' = sumntlrc`Y' if rank <= `X'
}
ren ntl2000 ntlall
collapse (sum) ntl*, by(cntry_name)
foreach X in 1 2 5 {
gen ntlprim`X'_`Y' = ntl`X'/ntlall*100
sum ntlprim`X'_`Y', d
}
sort cntry_name
save ntlprim_`Y', replace
}
* We combine them 
use ntlprim_00, clear
sort cntry_name
merge cntry_name using ntlprim_10
tab _m
drop _m
sort cntry_name
merge cntry_name using ntlprim_11
tab _m
drop _m
drop ntl1 ntl2 ntl5 ntlall
* We check the correlations - very high
corr ntlprim1_*
corr ntlprim2_*
corr ntlprim5_*
sort cntry_name
save citylightsprim, replace 

*** REGIONAL INEQUALITY BASED ON PRIMACY OF POP FROM GHS ***

** We now create the primacy rates based on pop **
* We use the wgt version to be consistent with the regressions of Table D10
foreach Y in 00 15 {
use "fua_uc_pop_max.dta", clear
gsort cntry_name -p`Y'_adj_max
bysort cntry_name: gen rank = _n
foreach X in 1 2 5 2000 {
gen pop`X' = p`Y'_adj_max if rank <= `X'
}
ren pop2000 popall
collapse (sum) pop*, by(cntry_name)
foreach X in 1 2 5 {
gen popprim`X'_`Y' = pop`X'/popall*100
sum popprim`X'_`Y', d
}
sort cntry_name
save popprim_`Y', replace
}
* We combine them 
use popprim_00, clear
sort cntry_name
merge cntry_name using popprim_15
tab _m
drop _m
* We check the correlations - very high
corr popprim1_*
corr popprim2_*
corr popprim5_*
sort cntry_name
save citypopprim, replace 

** We combine with the night lights one **

use citylightsprim, clear
sort cntry_name
merge cntry_name using citypopprim
tab _m
drop _m
corr popprim1_* ntlprim1_*
corr popprim2_* ntlprim2_*
corr popprim5_* ntlprim5_*
* Very high throughout * 
sort cntry_name
save ntlpopprim, replace

*** REGIONAL INEQUALITY BASED ON GINI OF NIGHT LIGHTS WITHIN THE GHS BOUNDARIES ***

* Admin 1 unit of each FUA *
* Created in the folder "Other Files\Lights" *
clear
import delimited "FUA_admin1.csv", clear 
keep efua_id cntry_name name_1
sort efua_id
save FUA_admin1, replace

* List of admin 1 units per country
* Created in the folder "Other Files\Lights" *
clear
import delimited "FUA_admin1.csv", clear 
keep name_0 cntry_name
bysort name_0: keep if _n == 1
count
* 187
sort name_0
save tempadmin, replace

clear
import delimited "admin1.csv", clear 
* We add the country name in GHS
sort name_0
merge name_0 using tempadmin
tab _m
tab name_0 if _m == 1
* These are not in the GHS data set so we can ignore 
drop if _m == 1
drop _m
keep cntry_name name_1
bysort cntry_name name_1: keep if _n == 1
sort cntry_name name_1
save admin1, replace
count
* 3260

use lights, clear
egen countrynum = group(cntry_name)
codebook countrynum
save temp, replace
foreach Y in 00 10 11 {
foreach X of numlist 1(1)188 {
use temp, clear
* We add the admin-1 unit
sort efua_id
merge efua_id using FUA_admin1
tab _m
drop if _m == 2
drop _m
* We collapse at the admin-1 level
collapse (sum) sumntlrc*, by(cntry_name countrynum name_1)
sort cntry_name name_1
merge cntry_name name_1 using admin1
tab _m
drop _m
foreach V of varlist sumntlrc* {
replace `V' = 0 if `V' == .
}
bysort cntry_name: egen maxcountrynum = max(countrynum)
replace countrynum = maxcountrynum
drop maxcountrynum
keep if countrynum==`X'
gen gini = .
ineqdeco sumntlrc`Y'
replace gini = $S_gini
keep cntry_name countrynum gini
ren gini ginintlrc`Y'
bysort cntry_name: keep if _n == 1
save temp`X', replace
}
* We combine them,
use temp1, clear
foreach X of numlist 2(1)188 {
append using temp`X'
}
sort cntry_name
save ginintlrc`Y', replace
}
* We combine *
use ginintlrc00, clear
sort cntry_name
merge cntry_name using ginintlrc10
tab _m
drop _m
sort cntry_name
merge cntry_name using ginintlrc11
tab _m
drop _m
drop countrynum
corr ginintlrc*
sort cntry_name
save ginintlrc, replace 

*** REGIONAL INEQUALITY BASED ON GINI OF POP OF THE GHS BOUNDARIES ***

use fua_uc_pop_max.dta, clear
egen countrynum = group(cntry_name)
codebook countrynum
save temp, replace
foreach Y in 00 15 {
foreach X of numlist 1(1)188 {
use temp, clear
* We add the admin-1 unit
sort efua_id
merge efua_id using FUA_admin1
tab _m
drop if _m == 2
drop _m
* We collapse at the admin-1 level
collapse (sum) p*_adj_max, by(cntry_name countrynum name_1)
sort cntry_name name_1
merge cntry_name name_1 using admin1
tab _m
drop _m
foreach V of varlist p*_adj_max {
replace `V' = 0 if `V' == .
}
bysort cntry_name: egen maxcountrynum = max(countrynum)
replace countrynum = maxcountrynum
drop maxcountrynum
keep if countrynum==`X'
gen gini = .
ineqdeco p`Y'_adj_max
replace gini = $S_gini
keep cntry_name countrynum gini
ren gini ginipop`Y'
bysort cntry_name: keep if _n == 1
save temp`X', replace
}
* We combine them,
use temp1, clear
foreach X of numlist 2(1)188 {
append using temp`X'
}
sort cntry_name
save ginipop`Y', replace
}
* We combine *
use ginipop00, clear
sort cntry_name
merge cntry_name using ginipop15
tab _m
drop _m
drop countrynum
corr ginipop*
sort cntry_name
save ginipop, replace 

***** ANALYSIS *****

* We combine with the main data
use LACdata, clear
* WE ADD THE GINIS * 
sort country_wb
merge country_wb using list_ginis
tab _m
tab country_wb if _m == 1
* We keep the ones where we merge 
drop if _m == 2
drop _m
replace gini = gini*100
** MFGSERV1960 **
foreach X in 1960 {
gen mfgservshare_`X' = mfgservshare if year == `X'
bysort ccode: egen mfgservshare`X' = max(mfgservshare_`X')
drop mfgservshare_`X'
}
** POP **
foreach X in 1960 2000 2010 2020 {
gen pop_`X' = pop if year == `X'
bysort ccode: egen pop`X' = max(pop_`X')
drop pop_`X'
}
foreach X in 2020 {
gen upop_`X' = upop if year == `X'
bysort ccode: egen upop`X' = max(upop_`X')
drop upop_`X'
}
gen pop1 = 1
gen pop201020 = pop2010 if year == 2010
replace pop201020 = pop2020 if year == 2020
sum pop201020 pop1960 pop1
foreach X in 1960 2000 2010 2020 {
gen lpcgdp_`X' = lpcgdp if year == `X'
bysort ccode: egen lpcgdp`X' = max(lpcgdp_`X')
drop lpcgdp_`X'
}
** DEINDUSTRIALIZATION **
foreach X in 1980 2000 2020 {
gen indu_`X' = mfgshare if year == `X'
bysort country_gjv: egen indu`X' = max(indu_`X')
drop indu_`X'
}
foreach X in 2000 2020 {
gen chg_1980`X' = indu`X' - indu1980
gen neg = (chg_1980`X' < 0)
gen negchg_1980`X' = 0 
replace negchg_1980`X' = chg_1980`X' if neg == 1
replace negchg_1980`X' = -negchg_1980`X'
sum negchg_1980`X', d
drop neg
}
* We create MFG+FIRE
gen mfgfireshare = mfgshare+fire2020_ma5_un if mfgshare != . & fire2020_ma5_un != . 

*** DIFFERENT PATHS AND THE GINI AS A DEPENDENT VARIABLE ***
* We use the same regressions as for urban employment
capture erase "tables/table_gini.xls"
capture erase "tables/table_gini.tex"
capture erase "tables/table_gini.txt"
local lincomset0 "nrxmean - mfgservshare"
local lincomset1 "negchg_19802000 - mfgservshare"
foreach V of varlist gini {
foreach Y in 2000 {
foreach X in ag {
gen nrxmean = nrx`X'_mean`Y'
* No urbrate ctrl *
xi: reg `V' nrxmean mfgservshare negchg_19802000 larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop`Y'] if year == `Y', robust
proglincom "`lincomset0'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset1'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
outreg2 * using "tables/table_gini.xls", keep(*mfgserv* *nrx* negchg_*) addtext(NRXGDP-MFGSERV, "`coeff1'", se1, "`se1'", DEINDU-MFGSERV, "`coeff2'", se2, "`se2'", diffcoef, "`coeff3'", diffse, "`se3'") se nocons coefastr bdec(2) sdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
* Urbrate ctrl only *
xi: reg `V' nrxmean mfgservshare negchg_19802000 urbrate larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop`Y'] if year == `Y', robust
proglincom "`lincomset0'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset1'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
outreg2 * using "tables/table_gini.xls", keep(*mfgserv* *nrx* negchg_*) addtext(NRXGDP-MFGSERV, "`coeff1'", se1, "`se1'", DEINDU-MFGSERV, "`coeff2'", se2, "`se2'", diffcoef, "`coeff3'", diffse, "`se3'") se nocons coefastr bdec(2) sdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
drop nrxmean
}
}
}
*** WITH MFGFIRE ***
capture erase "tables/table_gini_robfire.xls"
capture erase "tables/table_gini_robfire.tex"
capture erase "tables/table_gini_robfire.txt"
local lincomset0 "nrxmean - mfgfireshare"
local lincomset1 "negchg_19802000 - mfgfireshare"
foreach V of varlist gini {
foreach Y in 2000 {
foreach X in ag {
gen nrxmean = nrx`X'_mean`Y'
* No urbrate ctrl *
xi: reg `V' nrxmean mfgfireshare negchg_19802000 larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop`Y'] if year == `Y', robust
proglincom "`lincomset0'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset1'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
outreg2 * using "tables/table_gini_robfire.xls", keep(*mfgfire* *nrx* negchg_*) addtext(NRXGDP-MFGFIRE, "`coeff1'", se1, "`se1'", DEINDU-MFGFIRE, "`coeff2'", se2, "`se2'", diffcoef, "`coeff3'", diffse, "`se3'") se nocons coefastr bdec(2) sdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
* Urbrate ctrl only *
xi: reg `V' nrxmean mfgfireshare negchg_19802000 urbrate larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop`Y'] if year == `Y', robust
proglincom "`lincomset0'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset1'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
outreg2 * using "tables/table_gini_robfire.xls", keep(*mfgfire* *nrx* negchg_*) addtext(NRXGDP-MFGFIRE, "`coeff1'", se1, "`se1'", DEINDU-MFGFIRE, "`coeff2'", se2, "`se2'", diffcoef, "`coeff3'", diffse, "`se3'") se nocons coefastr bdec(2) sdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
drop nrxmean
}
}
}

*** DIFFERENT PATHS AND THE MAIN PRIMACY RATE AS A DEPENDENT VARIABLE ***
* Note that the specification for the primacy rate is slightly different as we can control for primacy in 1960
* Primacy * 
*xi: reg primacy_rate urbrate nrxmean mfgservshare chg_19802020 primacy_rate1960 urbrate1960 nrx1960 mfgserv_ca1960 larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop`Z'] if year == `Y', robust
* Both times, we check it conditional on the urbanization rate
* We investigate the correlation between the primacy rate (in pop - "quantity") and the Gini index (in types of employment - "quality")
corr primacy_rate gini if year == 2020
corr primacy_rate gini if year == 2010
corr primacy_rate gini if year == 2000
* Correlation about -0.5 (-0.55 for 2000 since gini centered around 2000)
* Higher primacy rate = urban pop less scattered
* Higher gini = urban employment more diversified across cities
* A negative correlation makes sense. 
* But not for the largest countries (-0.29 or -0.36 depending on which pop weights are used - total urban pop or total pop)
corr primacy_rate gini [w=upop2020] if year == 2020
corr primacy_rate gini [w=pop2020] if year == 2020
corr primacy_rate gini [w=upop2020] if year == 2000
corr primacy_rate gini [w=pop2020] if year == 2000

*** PRIMACY RATES OR GINI INDEXES BASED ON POP OR LIGHTS ***
* These are created above and we merge them with the data here
codebook country_wb
* 116
* We create a country name variable to add the measures above
gen cntry_name = country_wb
replace cntry_name = subinstr(cntry_name, " ", "",.) 
replace cntry_name = subinstr(cntry_name, "-", "",.) 
replace cntry_name = "Bahamas" if cntry_name == "Bahamas,The"
replace cntry_name = "Brunei" if cntry_name == "BruneiDarussalam"
replace cntry_name = "CapeVerde" if cntry_name == "CaboVerde"
replace cntry_name = "CotedIvoire" if cntry_name == "Coted'Ivoire"
replace cntry_name = "DemocraticRepublicoftheCongo" if cntry_name == "Congo,Dem.Rep."
replace cntry_name = "Egypt" if cntry_name == "Egypt,ArabRep."
*replace cntry_name = "FrenchGuiana	" if cntry_name == ""
replace cntry_name = "Gambia" if cntry_name == "Gambia,The"
*replace cntry_name = "Guadeloupe	" if cntry_name == ""
replace cntry_name = "HongKong" if cntry_name == "HongKongSAR,China"
replace cntry_name = "Iran" if cntry_name == "Iran,IslamicRep."
replace cntry_name = "Kyrgyzstan" if cntry_name == "KyrgyzRepublic"
replace cntry_name = "Laos" if cntry_name == "LaoPDR"
replace cntry_name = "Macao" if cntry_name == "MacaoSAR,China"
replace cntry_name = "Macedonia" if cntry_name == "NorthMacedonia"
*replace cntry_name = "Martinique	" if cntry_name == ""
*replace cntry_name = "Mayotte	" if cntry_name == ""
replace cntry_name = "NorthKorea" if cntry_name == "Korea,Dem.People'sRep."
replace cntry_name = "Palestina" if cntry_name == "WestBankandGaza"
replace cntry_name = "RepublicofCongo" if cntry_name == "Congo,Rep."
*replace cntry_name = "Reunion	" if cntry_name == ""
replace cntry_name = "Russia" if cntry_name == "RussianFederation"
replace cntry_name = "Slovakia" if cntry_name == "SlovakRepublic"
replace cntry_name = "SouthKorea" if cntry_name == "Korea,Rep."
replace cntry_name = "Swaziland" if cntry_name == "Eswatini"
replace cntry_name = "Syria" if cntry_name == "SyrianArabRepublic"
*replace cntry_name = "Taiwan	" if cntry_name == ""
replace cntry_name = "Venezuela" if cntry_name == "Venezuela,RB"
*replace cntry_name = "WesternSahara	" if cntry_name == ""
replace cntry_name = "Yemen" if cntry_name == "Yemen,Rep."
replace cntry_name = "CotedIvoire" if cntry_name == "Côted'Ivoire"
sort cntry_name

** Primacy rates: Pop and lights **
keep if year == 2000
sort cntry_name
merge cntry_name using ntlpopprim
tab _m
drop if _m == 2
tab cntry_name if _m == 1
* Not enough cities in GHS, so Bhutan not there. 
drop _m

** Gini: Pop and lights **
sort cntry_name
merge cntry_name using ginintlrc
tab _m
drop if _m == 2
tab cntry_name if _m == 1
* Not enough cities in GHS, so Bhutan not there. 
drop _m
sort cntry_name
merge cntry_name using ginipop
tab _m
drop if _m == 2
tab cntry_name if _m == 1
* Not enough cities in GHS, so Bhutan not there. 
drop _m
count
* 116

***** CORRELATIONS *****

*** PRIMACY MEASURES ***

* Primacy rate based on GHS pop - top 1, 2, 5
desc popprim*
corr gini popprim*
* around -0.70/-0.75 so quite high
* -0.74/-0.72 for 2000
* -0.73/0.76 for 2015
* more diversity of employment => lower primacy rates based on GHS
* Primacy rate based on lights - top 1, 2, 5
desc ntl*prim*
corr gini ntl*prim*
* around -0.70/-0.75 so quite high
* -0.73/-0.76 for 2000
* -0.71/0.75 for 2010
* more diversity of employment => lower primacy rates based on lights
* Indeed, strong correlation for pop and lights primacy rates
corr popprim* ntl*prim*

*** GINI MEASURES ***

* For pop
corr gini ginipop*
* Weaker = 0.22 for 2000 and 0.17 if 2015
* For lights 
corr gini ginintlrc*
* Weaker = 0.26 for 2000 and 0.25 for 2010
corr ginipop* ginintlrc*
* Quite high 

corr primacy_rate ginipop* ginintlrc*
* Negative, but aoround -0.32/-0.34
corr popprim* ginipop* 
* Negative, but aoround -0.37/-0.48
corr ntl*prim* ginintlrc*00 ginintlrc*10 
* Negative, but aoround -0.31/-0.44


*** SUMMARY ***
* Overall primacy rate ca. 2000 (all cities) = -0.55
* --- *
* GHS primacy rates (1, 2 or 5) ca. 2000 = -0.72/-0.74
* Lights primacy rates (1, 2 or 5) ca. 2000 = -0.73/-0.76
* --- *
* GHS gini ca. 2000 = 0.22
* Lights gini ca. 2000 = 0.26
* Primacy captures inequality at the top
* Gini is a more diffuse measure of inequality 

*** DIFFERENT PATHS AND THE VARIABLES ABOVE AS A DEPENDENT VARIABLE ***
* We use the same regressions as for urban employment
* We multiply the ginis by 100
foreach X of varlist ginipop* ginintlrc* {
replace `X' = `X'*100
}
capture erase "tables/table_gini.xls"
capture erase "tables/table_gini.tex"
capture erase "tables/table_gini.txt"
local lincomset0 "nrxmean - mfgservshare"
local lincomset1 "negchg_19802000 - mfgservshare"
foreach V of varlist gini popprim* ntl*prim* ginipop* ginintlrc* {
foreach Y in 2000 {
foreach X in ag {
gen nrxmean = nrx`X'_mean`Y'
* Urbrate ctrl only *
xi: reg `V' nrxmean mfgservshare negchg_19802000 urbrate larea larea_sq lpop lpop_sq smallisland i.type i.threshold|lthreshold_level [w=pop`Y'] if year == `Y', robust
proglincom "`lincomset0'"
local coeff1 = r(coefftxt)
local se1 = r(setxt)
proglincom "`lincomset1'"
local coeff2 = r(coefftxt)
local se2 = r(setxt)
outreg2 * using "tables/table_gini.xls", keep(*mfgserv* *nrx* negchg_*) addtext(NRXGDP-MFGSERV, "`coeff1'", se1, "`se1'", DEINDU-MFGSERV, "`coeff2'", se2, "`se2'", diffcoef, "`coeff3'", diffse, "`se3'") se nocons coefastr bdec(2) sdec(2) adjr2 noni nolabel bracket title(Effect, "") nonotes append
drop nrxmean
}
}
}
